1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU/SI: Fix threshold calculation for branching when exec is zero

Summary:
When control flow is implemented using the exec mask, the compiler will
insert branch instructions to skip over the masked section when exec is
zero if the section contains more than a certain number of instructions.

The previous code would only count instructions in successor blocks,
and this patch modifies the code to start counting instructions in all
blocks between the start and end of the branch.

Reviewers: nhaehnle, arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18282

llvm-svn: 263969
This commit is contained in:
Tom Stellard 2016-03-21 18:56:58 +00:00
parent 0a8f4c6b9a
commit c738ae1559
2 changed files with 39 additions and 3 deletions

View File

@ -130,10 +130,12 @@ bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
unsigned NumInstr = 0;
for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
MBB = *MBB->succ_begin()) {
for (MachineFunction::iterator MBBI = MachineFunction::iterator(From),
ToI = MachineFunction::iterator(To); MBBI != ToI; ++MBBI) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
MachineBasicBlock &MBB = *MBBI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
NumInstr < SkipThreshold && I != E; ++I) {
if (I->isBundle() || !I->isBundled()) {

View File

@ -24,5 +24,39 @@ out:
ret void
}
;CHECK-LABEL: {{^}}test2:
;CHECK: s_and_saveexec_b64
;CHECK: s_xor_b64
;CHECK-NEXT: s_cbranch_execz
define void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
main_body:
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%cc = icmp eq i32 %tid, 0
br i1 %cc, label %done1, label %if
if:
%cmp = icmp eq i32 %a, 0
br i1 %cmp, label %done0, label %loop_body
loop_body:
%counter = phi i32 [ 0, %if ], [0, %done0], [ %incr, %loop_body ]
; Prevent the loop from being optimized out
call void asm sideeffect "", "" ()
%incr = add i32 %counter, 1
%lc = icmp sge i32 %incr, 1000
br i1 %lc, label %done1, label %loop_body
done0:
%cmp0 = icmp eq i32 %b, 0
br i1 %cmp0, label %done1, label %loop_body
done1:
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readonly }