mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Don't run mode switches with exec 0
These are scalar instructions that change vector instructions, so they should not be executed without any active lanes. The implementation of -amdgpu-skip-threshold also seem to be backwards from expected, since decreasing it prevents removal.
This commit is contained in:
parent
22e9592a06
commit
58874f0270
@ -3011,6 +3011,20 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
|
||||
Opcode == AMDGPU::DS_GWS_BARRIER;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
|
||||
// Skip the full operand and register alias search modifiesRegister
|
||||
// does. There's only a handful of instructions that touch this, it's only an
|
||||
// implicit def, and doesn't alias any other registers.
|
||||
if (const MCPhysReg *ImpDef = MI.getDesc().getImplicitDefs()) {
|
||||
for (; ImpDef && *ImpDef; ++ImpDef) {
|
||||
if (*ImpDef == AMDGPU::MODE)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
|
||||
@ -3036,6 +3050,10 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
|
||||
if (MI.isCall() || MI.isInlineAsm())
|
||||
return true; // conservative assumption
|
||||
|
||||
// A mode change is a scalar operation that influences vector instructions.
|
||||
if (modifiesModeRegister(MI))
|
||||
return true;
|
||||
|
||||
// These are like SALU instructions in terms of effects, so it's questionable
|
||||
// whether we should return true for those.
|
||||
//
|
||||
|
@ -691,6 +691,9 @@ public:
|
||||
return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
|
||||
}
|
||||
|
||||
/// Return true if the instruction modifies the mode register.q
|
||||
static bool modifiesModeRegister(const MachineInstr &MI);
|
||||
|
||||
/// Whether we must prevent this instruction from executing with EXEC = 0.
|
||||
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
|
||||
|
||||
|
111
test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir
Normal file
111
test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir
Normal file
@ -0,0 +1,111 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# Make sure mandatory skips are not removed around mode defs.
|
||||
# FIXME: -amdgpu-skip-threshold seems to be backwards.
|
||||
|
||||
---
|
||||
|
||||
name: need_skip_setreg_imm32_b32
|
||||
body: |
|
||||
; CHECK-LABEL: name: need_skip_setreg_imm32_b32
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
|
||||
; CHECK: bb.2:
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: need_skip_setreg_b32
|
||||
body: |
|
||||
; CHECK-LABEL: name: need_skip_setreg_b32
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode
|
||||
; CHECK: bb.2:
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
successors: %bb.1, %bb.2
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $sgpr0
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: need_skip_denorm_mode
|
||||
body: |
|
||||
; CHECK-LABEL: name: need_skip_denorm_mode
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: S_DENORM_MODE 3, implicit-def $mode, implicit $mode
|
||||
; CHECK: bb.2:
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_DENORM_MODE 3, implicit-def $mode, implicit $mode
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: need_skip_round_mode
|
||||
body: |
|
||||
; CHECK-LABEL: name: need_skip_round_mode
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: S_ROUND_MODE 3, implicit-def $mode, implicit $mode
|
||||
; CHECK: bb.2:
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
|
||||
bb.1:
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_ROUND_MODE 3, implicit-def $mode, implicit $mode
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
@ -486,6 +486,24 @@ latch:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}skip_mode_switch:
|
||||
; CHECK: s_and_saveexec_b64
|
||||
; CHECK-NEXT: s_cbranch_execz
|
||||
; CHECK: s_setreg_imm32
|
||||
; CHECK: s_or_b64 exec, exec
|
||||
define void @skip_mode_switch(i32 %arg) {
|
||||
entry:
|
||||
%cmp = icmp eq i32 %arg, 0
|
||||
br i1 %cmp, label %bb.0, label %bb.1
|
||||
|
||||
bb.0:
|
||||
call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
|
||||
br label %bb.1
|
||||
|
||||
bb.1:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
|
||||
@ -494,6 +512,8 @@ declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, flo
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare void @llvm.amdgcn.kill(i1) #0
|
||||
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
attributes #2 = { nounwind readnone speculatable }
|
||||
|
Loading…
Reference in New Issue
Block a user