mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[AMDGPU][GlobalISel] Remove redundant cmp when copying constant to vcc
Differential Revision: https://reviews.llvm.org/D95540
This commit is contained in:
parent
2ea9f2deeb
commit
6b0d752b7f
@ -136,20 +136,29 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
|
||||
const TargetRegisterClass *SrcRC
|
||||
= TRI.getConstrainedRegClassForOperand(Src, *MRI);
|
||||
|
||||
Optional<ValueAndVReg> ConstVal =
|
||||
getConstantVRegValWithLookThrough(SrcReg, *MRI, true, true);
|
||||
if (ConstVal) {
|
||||
unsigned MovOpc =
|
||||
STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
|
||||
BuildMI(*BB, &I, DL, TII.get(MovOpc), DstReg)
|
||||
.addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
|
||||
} else {
|
||||
Register MaskedReg = MRI->createVirtualRegister(SrcRC);
|
||||
|
||||
// We can't trust the high bits at this point, so clear them.
|
||||
|
||||
// TODO: Skip masking high bits if def is known boolean.
|
||||
|
||||
unsigned AndOpc = TRI.isSGPRClass(SrcRC) ?
|
||||
AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
|
||||
unsigned AndOpc =
|
||||
TRI.isSGPRClass(SrcRC) ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
|
||||
BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
|
||||
.addImm(1)
|
||||
.addReg(SrcReg);
|
||||
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
|
||||
.addImm(0)
|
||||
.addReg(MaskedReg);
|
||||
}
|
||||
|
||||
if (!MRI->getRegClassOrNull(SrcReg))
|
||||
MRI->setRegClass(SrcReg, SrcRC);
|
||||
|
@ -219,7 +219,7 @@ define amdgpu_kernel void @break_loop(i32 %arg) {
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
|
||||
; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, 1
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], -1
|
||||
; CHECK-NEXT: s_cbranch_vccnz BB5_1
|
||||
; CHECK-NEXT: ; %bb.3: ; %bb4
|
||||
; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
|
||||
|
@ -321,3 +321,82 @@ body: |
|
||||
S_ENDPGM 0, implicit %0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: copy_s1_to_vcc
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
|
||||
; WAVE64-LABEL: name: copy_s1_to_vcc
|
||||
; WAVE64: liveins: $sgpr0_sgpr1
|
||||
; WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc
|
||||
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
|
||||
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
|
||||
; WAVE32-LABEL: name: copy_s1_to_vcc
|
||||
; WAVE32: liveins: $sgpr0_sgpr1
|
||||
; WAVE32: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
|
||||
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc
|
||||
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
|
||||
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
|
||||
%0:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s1) = G_TRUNC %0(s64)
|
||||
%2:vcc(s1) = COPY %1(s1)
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: copy_s1_false_to_vcc
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; WAVE64-LABEL: name: copy_s1_false_to_vcc
|
||||
; WAVE64: liveins: $sgpr0
|
||||
; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B64_]]
|
||||
; WAVE32-LABEL: name: copy_s1_false_to_vcc
|
||||
; WAVE32: liveins: $sgpr0
|
||||
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||
%0:sgpr(s1) = G_CONSTANT i1 false
|
||||
%1:vcc(s1) = COPY %0(s1)
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: copy_s1_true_to_vcc
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; WAVE64-LABEL: name: copy_s1_true_to_vcc
|
||||
; WAVE64: liveins: $sgpr0
|
||||
; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B64_]]
|
||||
; WAVE32-LABEL: name: copy_s1_true_to_vcc
|
||||
; WAVE32: liveins: $sgpr0
|
||||
; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||
%0:sgpr(s1) = G_CONSTANT i1 true
|
||||
%1:vcc(s1) = COPY %0(s1)
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
@ -672,7 +672,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
|
||||
; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13
|
||||
; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c
|
||||
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x25
|
||||
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, 0
|
||||
; GFX7-NEXT: s_mov_b64 vcc, 0
|
||||
; GFX7-NEXT: s_mov_b32 s6, -1
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s2
|
||||
@ -688,7 +688,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
|
||||
; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c
|
||||
; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70
|
||||
; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94
|
||||
; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, 0
|
||||
; GFX8-NEXT: s_mov_b64 vcc, 0
|
||||
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
||||
@ -707,7 +707,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
|
||||
; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x94
|
||||
; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c
|
||||
; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, 0
|
||||
; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0
|
||||
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
|
||||
; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6
|
||||
@ -723,7 +723,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
|
||||
; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x94
|
||||
; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c
|
||||
; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, 0
|
||||
; GFX10_W64-NEXT: s_mov_b64 vcc, 0
|
||||
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
|
||||
; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6
|
||||
@ -743,7 +743,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
|
||||
; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13
|
||||
; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c
|
||||
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x25
|
||||
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, 1
|
||||
; GFX7-NEXT: s_mov_b64 vcc, -1
|
||||
; GFX7-NEXT: s_mov_b32 s6, -1
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s2
|
||||
@ -759,7 +759,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
|
||||
; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c
|
||||
; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70
|
||||
; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94
|
||||
; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, 1
|
||||
; GFX8-NEXT: s_mov_b64 vcc, -1
|
||||
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
||||
@ -778,7 +778,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
|
||||
; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x94
|
||||
; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c
|
||||
; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, 1
|
||||
; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1
|
||||
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
|
||||
; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6
|
||||
@ -794,7 +794,7 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
|
||||
; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x94
|
||||
; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c
|
||||
; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
||||
; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, 1
|
||||
; GFX10_W64-NEXT: s_mov_b64 vcc, -1
|
||||
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
|
||||
; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6
|
||||
|
@ -170,8 +170,7 @@ define void @localize_internal_globals(i1 %cond) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], vcc, -1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
|
||||
; GFX9-NEXT: s_cbranch_execz BB2_2
|
||||
|
Loading…
Reference in New Issue
Block a user