2017-08-08 02:47:13 +02:00
|
|
|
//===- SIAnnotateControlFlow.cpp ------------------------------------------===//
|
2012-12-19 23:10:31 +01:00
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-12-19 23:10:31 +01:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
|
|
|
/// Annotates the control flow with hardware specific intrinsics.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AMDGPU.h"
|
2019-06-14 01:47:36 +02:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2013-01-02 11:22:59 +01:00
|
|
|
#include "llvm/ADT/DepthFirstIterator.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2018-08-30 16:21:36 +02:00
|
|
|
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2019-06-14 01:47:36 +02:00
|
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/CFG.h"
|
|
|
|
#include "llvm/IR/Constant.h"
|
2013-06-07 22:28:43 +02:00
|
|
|
#include "llvm/IR/Constants.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2014-01-13 10:26:24 +01:00
|
|
|
#include "llvm/IR/Dominators.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
2013-06-07 22:28:43 +02:00
|
|
|
#include "llvm/IR/Instructions.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2013-01-02 12:36:10 +01:00
|
|
|
#include "llvm/IR/Module.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
#include "llvm/IR/ValueHandle.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-13 22:15:01 +01:00
|
|
|
#include "llvm/InitializePasses.h"
|
2013-01-02 11:22:59 +01:00
|
|
|
#include "llvm/Pass.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2012-12-19 23:10:31 +01:00
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
2019-05-13 20:05:10 +02:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2017-08-08 02:47:13 +02:00
|
|
|
#include <cassert>
|
|
|
|
#include <utility>
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2014-04-22 04:41:26 +02:00
|
|
|
#define DEBUG_TYPE "si-annotate-control-flow"
|
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Complex types used in this pass
|
2017-08-08 02:47:13 +02:00
|
|
|
using StackEntry = std::pair<BasicBlock *, Value *>;
|
|
|
|
using StackVector = SmallVector<StackEntry, 16>;
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
class SIAnnotateControlFlow : public FunctionPass {
|
2018-08-30 16:21:36 +02:00
|
|
|
LegacyDivergenceAnalysis *DA;
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
Type *Boolean;
|
|
|
|
Type *Void;
|
2019-06-14 01:47:36 +02:00
|
|
|
Type *IntMask;
|
2012-12-19 23:10:31 +01:00
|
|
|
Type *ReturnStruct;
|
|
|
|
|
|
|
|
ConstantInt *BoolTrue;
|
|
|
|
ConstantInt *BoolFalse;
|
|
|
|
UndefValue *BoolUndef;
|
2019-06-14 01:47:36 +02:00
|
|
|
Constant *IntMaskZero;
|
2012-12-19 23:10:31 +01:00
|
|
|
|
2017-03-17 21:41:45 +01:00
|
|
|
Function *If;
|
|
|
|
Function *Else;
|
|
|
|
Function *IfBreak;
|
|
|
|
Function *Loop;
|
|
|
|
Function *EndCf;
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
DominatorTree *DT;
|
|
|
|
StackVector Stack;
|
|
|
|
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
LoopInfo *LI;
|
|
|
|
|
2019-06-14 01:47:36 +02:00
|
|
|
void initialize(Module &M, const GCNSubtarget &ST);
|
|
|
|
|
2016-04-14 19:42:35 +02:00
|
|
|
bool isUniform(BranchInst *T);
|
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
bool isTopOfStack(BasicBlock *BB);
|
|
|
|
|
|
|
|
Value *popSaved();
|
|
|
|
|
|
|
|
void push(BasicBlock *BB, Value *Saved);
|
|
|
|
|
|
|
|
bool isElse(PHINode *Phi);
|
|
|
|
|
|
|
|
void eraseIfUnused(PHINode *Phi);
|
|
|
|
|
|
|
|
void openIf(BranchInst *Term);
|
|
|
|
|
|
|
|
void insertElse(BranchInst *Term);
|
|
|
|
|
2017-05-01 19:07:49 +02:00
|
|
|
Value *
|
|
|
|
handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
|
AMDGPU: Remove PHI loop condition optimization
Summary:
The optimization to early break out of loops if all threads are dead was
never fully implemented.
But the PHI node analyzing is actually causing a number of problems, so
remove all the extra code for it.
(This does actually regress code quality in a few places because it
ends up relying more heavily on phi's of i1, which we don't do a
great job with. However, since it fixes real bugs in the wild, we
should take this change. I have some prototype changes to improve
i1 lowering in general -- not just for control flow -- which should
help recover the code quality, I just need to make those changes
fit for general consumption. -- Nicolai)
Change-Id: I6fc6c6c8961857ac6009fcfb9f7e5e48dc23fbb1
Patch-by: Christian König <christian.koenig@amd.com>
Reviewers: arsenm, rampitec, tpr
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D53359
llvm-svn: 345718
2018-10-31 14:26:48 +01:00
|
|
|
BranchInst *Term);
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
void handleLoop(BranchInst *Term);
|
|
|
|
|
|
|
|
void closeControlFlow(BasicBlock *BB);
|
|
|
|
|
|
|
|
public:
|
2016-01-20 16:48:27 +01:00
|
|
|
static char ID;
|
|
|
|
|
2017-08-08 02:47:13 +02:00
|
|
|
SIAnnotateControlFlow() : FunctionPass(ID) {}
|
2012-12-19 23:10:31 +01:00
|
|
|
|
2014-04-29 09:57:24 +02:00
|
|
|
bool runOnFunction(Function &F) override;
|
2012-12-19 23:10:31 +01:00
|
|
|
|
2016-10-01 04:56:57 +02:00
|
|
|
StringRef getPassName() const override { return "SI annotate control flow"; }
|
2012-12-19 23:10:31 +01:00
|
|
|
|
2014-04-29 09:57:24 +02:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
AU.addRequired<LoopInfoWrapperPass>();
|
2014-01-13 14:07:17 +01:00
|
|
|
AU.addRequired<DominatorTreeWrapperPass>();
|
2018-08-30 16:21:36 +02:00
|
|
|
AU.addRequired<LegacyDivergenceAnalysis>();
|
2014-01-13 14:07:17 +01:00
|
|
|
AU.addPreserved<DominatorTreeWrapperPass>();
|
2019-06-14 01:47:36 +02:00
|
|
|
AU.addRequired<TargetPassConfig>();
|
2012-12-19 23:10:31 +01:00
|
|
|
FunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2016-01-20 16:48:27 +01:00
|
|
|
INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
|
|
|
|
"Annotate SI Control Flow", false, false)
|
2017-03-03 00:50:51 +01:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
2018-08-30 16:21:36 +02:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
|
2019-06-14 01:47:36 +02:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
2016-01-20 16:48:27 +01:00
|
|
|
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
|
|
|
|
"Annotate SI Control Flow", false, false)
|
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
char SIAnnotateControlFlow::ID = 0;
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Initialize all the types and constants used in the pass
|
2019-06-14 01:47:36 +02:00
|
|
|
void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
|
2012-12-19 23:10:31 +01:00
|
|
|
LLVMContext &Context = M.getContext();
|
|
|
|
|
|
|
|
Void = Type::getVoidTy(Context);
|
|
|
|
Boolean = Type::getInt1Ty(Context);
|
2019-06-14 01:47:36 +02:00
|
|
|
IntMask = ST.isWave32() ? Type::getInt32Ty(Context)
|
|
|
|
: Type::getInt64Ty(Context);
|
|
|
|
ReturnStruct = StructType::get(Boolean, IntMask);
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
BoolTrue = ConstantInt::getTrue(Context);
|
|
|
|
BoolFalse = ConstantInt::getFalse(Context);
|
|
|
|
BoolUndef = UndefValue::get(Boolean);
|
2019-06-14 01:47:36 +02:00
|
|
|
IntMaskZero = ConstantInt::get(IntMask, 0);
|
|
|
|
|
|
|
|
If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });
|
|
|
|
Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else,
|
|
|
|
{ IntMask, IntMask });
|
|
|
|
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
|
2020-02-03 15:07:36 +01:00
|
|
|
{ IntMask });
|
2019-06-14 01:47:36 +02:00
|
|
|
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
|
|
|
|
EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask });
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Is the branch condition uniform or did the StructurizeCFG pass
|
2016-04-14 19:42:35 +02:00
|
|
|
/// consider it as such?
|
|
|
|
bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
|
AMDGPU: test for uniformity of branch instruction, not its condition
Summary:
If a divergent branch instruction is marked as divergent by propagation
rule 2 in DivergencePropagator::exploreSyncDependency() and its condition
is uniform, that branch would incorrectly be assumed to be uniform.
Reviewers: arsenm, tstellar
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D56331
llvm-svn: 350532
2019-01-07 16:52:28 +01:00
|
|
|
return DA->isUniform(T) ||
|
2016-04-14 19:42:35 +02:00
|
|
|
T->getMetadata("structurizecfg.uniform") != nullptr;
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Is BB the last block saved on the stack ?
|
2012-12-19 23:10:31 +01:00
|
|
|
bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
|
2013-02-14 09:00:33 +01:00
|
|
|
return !Stack.empty() && Stack.back().first == BB;
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Pop the last saved value from the control flow stack
|
2012-12-19 23:10:31 +01:00
|
|
|
Value *SIAnnotateControlFlow::popSaved() {
|
|
|
|
return Stack.pop_back_val().second;
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Push a BB and saved value to the control flow stack
|
2012-12-19 23:10:31 +01:00
|
|
|
void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
|
|
|
|
Stack.push_back(std::make_pair(BB, Saved));
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Can the condition represented by this PHI node treated like
|
2012-12-19 23:10:31 +01:00
|
|
|
/// an "Else" block?
|
|
|
|
bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
|
|
|
|
BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
|
|
|
|
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
|
|
|
|
if (Phi->getIncomingBlock(i) == IDom) {
|
|
|
|
|
|
|
|
if (Phi->getIncomingValue(i) != BoolTrue)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
if (Phi->getIncomingValue(i) != BoolFalse)
|
|
|
|
return false;
|
2014-06-20 19:06:02 +02:00
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
// Erase "Phi" if it is not used any more
|
2012-12-19 23:10:31 +01:00
|
|
|
void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
|
2017-08-08 02:47:13 +02:00
|
|
|
if (RecursivelyDeleteDeadPHINode(Phi)) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Erased unused condition phi\n");
|
2017-03-24 21:57:10 +01:00
|
|
|
}
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Open a new "If" block
|
2012-12-19 23:10:31 +01:00
|
|
|
void SIAnnotateControlFlow::openIf(BranchInst *Term) {
|
2017-03-15 19:00:12 +01:00
|
|
|
if (isUniform(Term))
|
2016-02-13 00:45:29 +01:00
|
|
|
return;
|
2017-03-15 19:00:12 +01:00
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
|
|
|
|
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
|
|
|
|
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Close the last "If" block and open a new "Else" block
|
2012-12-19 23:10:31 +01:00
|
|
|
void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
|
2016-04-14 19:42:35 +02:00
|
|
|
if (isUniform(Term)) {
|
2016-02-13 00:45:29 +01:00
|
|
|
return;
|
|
|
|
}
|
2012-12-19 23:10:31 +01:00
|
|
|
Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
|
|
|
|
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
|
|
|
|
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Recursively handle the condition leading to a loop
|
2017-03-24 21:57:10 +01:00
|
|
|
Value *SIAnnotateControlFlow::handleLoopCondition(
|
AMDGPU: Remove PHI loop condition optimization
Summary:
The optimization to early break out of loops if all threads are dead was
never fully implemented.
But the PHI node analyzing is actually causing a number of problems, so
remove all the extra code for it.
(This does actually regress code quality in a few places because it
ends up relying more heavily on phi's of i1, which we don't do a
great job with. However, since it fixes real bugs in the wild, we
should take this change. I have some prototype changes to improve
i1 lowering in general -- not just for control flow -- which should
help recover the code quality, I just need to make those changes
fit for general consumption. -- Nicolai)
Change-Id: I6fc6c6c8961857ac6009fcfb9f7e5e48dc23fbb1
Patch-by: Christian König <christian.koenig@amd.com>
Reviewers: arsenm, rampitec, tpr
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D53359
llvm-svn: 345718
2018-10-31 14:26:48 +01:00
|
|
|
Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
|
2017-03-15 19:00:12 +01:00
|
|
|
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
|
2012-12-19 23:10:31 +01:00
|
|
|
BasicBlock *Parent = Inst->getParent();
|
2015-04-14 16:36:45 +02:00
|
|
|
Instruction *Insert;
|
|
|
|
if (L->contains(Inst)) {
|
|
|
|
Insert = Parent->getTerminator();
|
|
|
|
} else {
|
|
|
|
Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
|
|
|
|
}
|
2017-03-15 19:00:12 +01:00
|
|
|
|
2014-06-20 19:06:02 +02:00
|
|
|
Value *Args[] = { Cond, Broken };
|
|
|
|
return CallInst::Create(IfBreak, Args, "", Insert);
|
2017-03-15 19:00:12 +01:00
|
|
|
}
|
2012-12-19 23:10:31 +01:00
|
|
|
|
2017-03-17 21:52:21 +01:00
|
|
|
// Insert IfBreak in the loop header TERM for constant COND other than true.
|
|
|
|
if (isa<Constant>(Cond)) {
|
|
|
|
Instruction *Insert = Cond == BoolTrue ?
|
|
|
|
Term : L->getHeader()->getTerminator();
|
|
|
|
|
2016-02-12 18:11:04 +01:00
|
|
|
Value *Args[] = { Cond, Broken };
|
2017-03-17 21:52:21 +01:00
|
|
|
return CallInst::Create(IfBreak, Args, "", Insert);
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
2017-03-15 19:00:12 +01:00
|
|
|
|
|
|
|
llvm_unreachable("Unhandled loop condition!");
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Handle a back edge (loop)
|
2012-12-19 23:10:31 +01:00
|
|
|
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
|
2017-03-15 19:00:12 +01:00
|
|
|
if (isUniform(Term))
|
2016-02-13 00:45:29 +01:00
|
|
|
return;
|
|
|
|
|
2015-04-14 16:36:45 +02:00
|
|
|
BasicBlock *BB = Term->getParent();
|
|
|
|
llvm::Loop *L = LI->getLoopFor(BB);
|
2016-07-29 01:01:45 +02:00
|
|
|
if (!L)
|
|
|
|
return;
|
2017-03-15 19:00:12 +01:00
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
BasicBlock *Target = Term->getSuccessor(1);
|
2019-06-14 01:47:36 +02:00
|
|
|
PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front());
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
Value *Cond = Term->getCondition();
|
|
|
|
Term->setCondition(BoolTrue);
|
AMDGPU: Remove PHI loop condition optimization
Summary:
The optimization to early break out of loops if all threads are dead was
never fully implemented.
But the PHI node analyzing is actually causing a number of problems, so
remove all the extra code for it.
(This does actually regress code quality in a few places because it
ends up relying more heavily on phi's of i1, which we don't do a
great job with. However, since it fixes real bugs in the wild, we
should take this change. I have some prototype changes to improve
i1 lowering in general -- not just for control flow -- which should
help recover the code quality, I just need to make those changes
fit for general consumption. -- Nicolai)
Change-Id: I6fc6c6c8961857ac6009fcfb9f7e5e48dc23fbb1
Patch-by: Christian König <christian.koenig@amd.com>
Reviewers: arsenm, rampitec, tpr
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D53359
llvm-svn: 345718
2018-10-31 14:26:48 +01:00
|
|
|
Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
|
2012-12-19 23:10:31 +01:00
|
|
|
|
2019-03-15 22:02:48 +01:00
|
|
|
for (BasicBlock *Pred : predecessors(Target)) {
|
2019-06-14 01:47:36 +02:00
|
|
|
Value *PHIValue = IntMaskZero;
|
2019-03-15 22:02:48 +01:00
|
|
|
if (Pred == BB) // Remember the value of the previous iteration.
|
|
|
|
PHIValue = Arg;
|
|
|
|
// If the backedge from Pred to Target could be executed before the exit
|
|
|
|
// of the loop at BB, it should not reset or change "Broken", which keeps
|
|
|
|
// track of the number of threads exited the loop at BB.
|
|
|
|
else if (L->contains(Pred) && DT->dominates(Pred, BB))
|
|
|
|
PHIValue = Broken;
|
|
|
|
Broken->addIncoming(PHIValue, Pred);
|
|
|
|
}
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
|
2017-03-24 21:57:10 +01:00
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
push(Term->getSuccessor(0), Arg);
|
2017-03-15 19:00:12 +01:00
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Close the last opened control flow
|
2012-12-19 23:10:31 +01:00
|
|
|
void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
llvm::Loop *L = LI->getLoopFor(BB);
|
|
|
|
|
2016-04-14 19:42:18 +02:00
|
|
|
assert(Stack.back().first == BB);
|
2016-02-13 00:45:29 +01:00
|
|
|
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
if (L && L->getHeader() == BB) {
|
|
|
|
// We can't insert an EndCF call into a loop header, because it will
|
|
|
|
// get executed on every iteration of the loop, when it should be
|
|
|
|
// executed only once before the loop.
|
2017-03-15 19:00:12 +01:00
|
|
|
SmallVector <BasicBlock *, 8> Latches;
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
L->getLoopLatches(Latches);
|
|
|
|
|
2017-03-15 19:00:12 +01:00
|
|
|
SmallVector<BasicBlock *, 2> Preds;
|
|
|
|
for (BasicBlock *Pred : predecessors(BB)) {
|
|
|
|
if (!is_contained(Latches, Pred))
|
|
|
|
Preds.push_back(Pred);
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
}
|
2017-03-15 19:00:12 +01:00
|
|
|
|
2018-08-22 01:32:03 +02:00
|
|
|
BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, nullptr,
|
|
|
|
false);
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
}
|
|
|
|
|
2016-02-13 00:45:29 +01:00
|
|
|
Value *Exec = popSaved();
|
2017-03-08 00:29:36 +01:00
|
|
|
Instruction *FirstInsertionPt = &*BB->getFirstInsertionPt();
|
|
|
|
if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt))
|
|
|
|
CallInst::Create(EndCf, Exec, "", FirstInsertionPt);
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Annotate the control flow with intrinsics so the backend can
|
2012-12-19 23:10:31 +01:00
|
|
|
/// recognize if/then/else and loops.
|
|
|
|
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
|
2014-01-13 14:07:17 +01:00
|
|
|
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
if-then-else blocks, and loops. It is responsible for updating the
exec mask to re-enable threads that had been masked during the preceding
control flow block. For example:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
was being inserted into the header of loops. This would happen when
an if block terminated in a loop header and we would end up with
code like this:
s_mov_b64 exec, 0x3 ; Initial exec mask
s_mov_b64 s[0:1], exec ; Saved exec mask
v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
do_stuff()
LOOP: ; Start of loop header
s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
same value at the beginning of each loop
iteration.
do_stuff();
s_cbranch_execnz LOOP
The fix is to create a new basic block before the loop and insert the
llvm.SI.end.cf there. This way the exec mask is restored before the
start of the loop instead of at the beginning of each iteration.
llvm-svn: 228302
2015-02-05 16:32:15 +01:00
|
|
|
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
2018-08-30 16:21:36 +02:00
|
|
|
DA = &getAnalysis<LegacyDivergenceAnalysis>();
|
2019-06-14 01:47:36 +02:00
|
|
|
TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
|
|
|
|
const TargetMachine &TM = TPC.getTM<TargetMachine>();
|
|
|
|
|
|
|
|
initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F));
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
|
|
|
|
E = df_end(&F.getEntryBlock()); I != E; ++I) {
|
2017-03-15 19:00:12 +01:00
|
|
|
BasicBlock *BB = *I;
|
|
|
|
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
|
2012-12-19 23:10:31 +01:00
|
|
|
|
|
|
|
if (!Term || Term->isUnconditional()) {
|
2017-03-15 19:00:12 +01:00
|
|
|
if (isTopOfStack(BB))
|
|
|
|
closeControlFlow(BB);
|
2016-02-13 00:45:29 +01:00
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (I.nodeVisited(Term->getSuccessor(1))) {
|
2017-03-15 19:00:12 +01:00
|
|
|
if (isTopOfStack(BB))
|
|
|
|
closeControlFlow(BB);
|
2016-02-13 00:45:29 +01:00
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
handleLoop(Term);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-03-15 19:00:12 +01:00
|
|
|
if (isTopOfStack(BB)) {
|
2012-12-19 23:10:31 +01:00
|
|
|
PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
|
2017-03-15 19:00:12 +01:00
|
|
|
if (Phi && Phi->getParent() == BB && isElse(Phi)) {
|
2012-12-19 23:10:31 +01:00
|
|
|
insertElse(Term);
|
|
|
|
eraseIfUnused(Phi);
|
|
|
|
continue;
|
|
|
|
}
|
2017-03-15 19:00:12 +01:00
|
|
|
|
|
|
|
closeControlFlow(BB);
|
2012-12-19 23:10:31 +01:00
|
|
|
}
|
2017-03-15 19:00:12 +01:00
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
openIf(Term);
|
|
|
|
}
|
|
|
|
|
2018-01-17 17:30:01 +01:00
|
|
|
if (!Stack.empty()) {
|
|
|
|
// CFG was probably not structured.
|
|
|
|
report_fatal_error("failed to annotate CFG");
|
|
|
|
}
|
|
|
|
|
2012-12-19 23:10:31 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-05-01 17:54:18 +02:00
|
|
|
/// Create the annotation pass
|
2012-12-19 23:10:31 +01:00
|
|
|
FunctionPass *llvm::createSIAnnotateControlFlowPass() {
|
|
|
|
return new SIAnnotateControlFlow();
|
|
|
|
}
|