1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

Revert "[JumpThreading] Thread through guards"

This reverts commit r294617.

We fail on an assert while trying to get a condition from an
unconditional branch.

llvm-svn: 295200
This commit is contained in:
Anna Thomas 2017-02-15 17:08:29 +00:00
parent 5bc87e6237
commit 08063ea857
6 changed files with 14 additions and 382 deletions

View File

@ -23,7 +23,6 @@
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/ValueHandle.h"
namespace llvm {
@ -63,7 +62,6 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = false;
bool HasGuards = false;
#ifdef NDEBUG
SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
#else
@ -124,9 +122,6 @@ public:
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
bool ProcessGuards(BasicBlock *BB);
bool ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI);
private:
BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix);

View File

@ -245,16 +245,6 @@ Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
void remapInstructionsInBlocks(const SmallVectorImpl<BasicBlock *> &Blocks,
ValueToValueMapTy &VMap);
/// Split edge between BB and PredBB and duplicate all non-Phi instructions
/// from BB between its beginning and the StopAt instruction into the split
/// block. Phi nodes are not duplicated, but their uses are handled correctly:
/// we replace them with the uses of corresponding Phi inputs. ValueMapping
/// is used to map the original instructions from BB to their newly-created
/// copies. Returns the split block.
BasicBlock *
DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
Instruction *StopAt,
ValueToValueMapTy &ValueMapping);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H

View File

@ -30,13 +30,11 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@ -171,9 +169,6 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// When profile data is available, we need to update edge weights after
// successful jump threading, which requires both BPI and BFI being available.
HasProfileData = HasProfileData_;
auto *GuardDecl = F.getParent()->getFunction(
Intrinsic::getName(Intrinsic::experimental_guard));
HasGuards = GuardDecl && !GuardDecl->use_empty();
if (HasProfileData) {
BPI = std::move(BPI_);
BFI = std::move(BFI_);
@ -243,13 +238,10 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
return EverChanged;
}
/// Return the cost of duplicating a piece of this block from first non-phi
/// and before StopAt instruction to thread across it. Stop scanning the block
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
Instruction *StopAt,
/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
/// thread across it. Stop scanning the block when passing the threshold.
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
unsigned Threshold) {
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I(BB->getFirstNonPHI());
@ -257,17 +249,15 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
// branch, so they shouldn't count against the duplication cost.
unsigned Bonus = 0;
if (BB->getTerminator() == StopAt) {
// Threading through a switch statement is particularly profitable. If this
// block ends in a switch, decrease its cost to make it more likely to
// happen.
if (isa<SwitchInst>(StopAt))
Bonus = 6;
const TerminatorInst *BBTerm = BB->getTerminator();
// Threading through a switch statement is particularly profitable. If this
// block ends in a switch, decrease its cost to make it more likely to happen.
if (isa<SwitchInst>(BBTerm))
Bonus = 6;
// The same holds for indirect branches, but slightly more so.
if (isa<IndirectBrInst>(StopAt))
Bonus = 8;
}
// The same holds for indirect branches, but slightly more so.
if (isa<IndirectBrInst>(BBTerm))
Bonus = 8;
// Bump the threshold up so the early exit from the loop doesn't skip the
// terminator-based Size adjustment at the end.
@ -276,7 +266,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
for (; &*I != StopAt; ++I) {
for (; !isa<TerminatorInst>(I); ++I) {
// Stop scanning the block if we've reached the threshold.
if (Size > Threshold)
@ -722,10 +712,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (TryToUnfoldSelectInCurrBB(BB))
return true;
// Look if we can propagate guards to predecessors.
if (HasGuards && ProcessGuards(BB))
return true;
// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;
@ -1480,8 +1466,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
return false;
}
unsigned JumpThreadCost =
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
if (JumpThreadCost > BBDupThreshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
@ -1769,8 +1754,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
return false;
}
unsigned DuplicationCost =
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
if (DuplicationCost > BBDupThreshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
@ -2035,124 +2019,3 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
return false;
}
/// Try to propagate a guard from the current BB into one of its predecessors
/// in case if another branch of execution implies that the condition of this
/// guard is always true. Currently we only process the simplest case that
/// looks like:
///
/// Start:
/// %cond = ...
/// br i1 %cond, label %T1, label %F1
/// T1:
/// br label %Merge
/// F1:
/// br label %Merge
/// Merge:
/// %condGuard = ...
/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
///
/// And cond either implies condGuard or !condGuard. In this case all the
/// instructions before the guard can be duplicated in both branches, and the
/// guard is then threaded to one of them.
bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
using namespace PatternMatch;
// We only want to deal with two predecessors.
BasicBlock *Pred1, *Pred2;
auto PI = pred_begin(BB), PE = pred_end(BB);
if (PI == PE)
return false;
Pred1 = *PI++;
if (PI == PE)
return false;
Pred2 = *PI++;
if (PI != PE)
return false;
// Try to thread one of the guards of the block.
// TODO: Look up deeper than to immediate predecessor?
if (auto Parent = Pred1->getSinglePredecessor())
if (Parent == Pred2->getSinglePredecessor())
if (BranchInst *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
for (auto &I : *BB)
if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
return true;
return false;
}
/// Try to propagate the guard from BB which is the lower block of a diamond
/// to one of its branches, in case if diamond's condition implies guard's
/// condition.
bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
BranchInst *BI) {
Value *GuardCond = Guard->getArgOperand(0);
Value *BranchCond = BI->getCondition();
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);
auto &DL = BB->getModule()->getDataLayout();
bool TrueDestIsSafe = false;
bool FalseDestIsSafe = false;
// True dest is safe if BranchCond => GuardCond.
auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
if (Impl && *Impl)
TrueDestIsSafe = true;
else {
// False dest is safe if !BranchCond => GuardCond.
Impl =
isImpliedCondition(BranchCond, GuardCond, DL, /* InvertAPred */ true);
if (Impl && *Impl)
FalseDestIsSafe = true;
}
if (!TrueDestIsSafe && !FalseDestIsSafe)
return false;
BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
ValueToValueMapTy UnguardedMapping, GuardedMapping;
Instruction *AfterGuard = Guard->getNextNode();
unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
if (Cost > BBDupThreshold)
return false;
// Duplicate all instructions before the guard and the guard itself to the
// branch where implication is not proved.
GuardedBlock = DuplicateInstructionsInSplitBetween(
BB, GuardedBlock, AfterGuard, GuardedMapping);
assert(GuardedBlock && "Could not create the guarded block?");
// Duplicate all instructions before the guard in the unguarded branch.
// Since we have successfully duplicated the guarded block and this block
// has fewer instructions, we expect it to succeed.
UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock,
Guard, UnguardedMapping);
assert(UnguardedBlock && "Could not create the unguarded block?");
DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
<< GuardedBlock->getName() << "\n");
// Some instructions before the guard may still have uses. For them, we need
// to create Phi nodes merging their copies in both guarded and unguarded
// branches. Those instructions that have no uses can be just removed.
SmallVector<Instruction *, 4> ToRemove;
for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
if (!isa<PHINode>(&*BI))
ToRemove.push_back(&*BI);
Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
assert(InsertionPoint && "Empty block?");
// Substitute with Phis & remove.
for (auto *Inst : reverse(ToRemove)) {
if (!Inst->use_empty()) {
PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
NewPN->insertBefore(InsertionPoint);
Inst->replaceAllUsesWith(NewPN);
}
Inst->eraseFromParent();
}
return true;
}

View File

@ -747,40 +747,3 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
return NewLoop;
}
/// \brief Duplicate non-Phi instructions from the beginning of block up to
/// StopAt instruction into a split block between BB and its predecessor.
BasicBlock *
llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
Instruction *StopAt,
ValueToValueMapTy &ValueMapping) {
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
BasicBlock *NewBB = SplitEdge(PredBB, BB);
NewBB->setName(PredBB->getName() + ".split");
Instruction *NewTerm = NewBB->getTerminator();
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; StopAt != &*BI; ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
New->insertBefore(NewTerm);
ValueMapping[&*BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
auto I = ValueMapping.find(Inst);
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
}
return NewBB;
}

View File

@ -1,132 +0,0 @@
; RUN: opt < %s -jump-threading -dce -S | FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
declare i32 @f1()
declare i32 @f2()
define i32 @branch_implies_guard(i32 %a) {
; CHECK-LABEL: @branch_implies_guard(
%cond = icmp slt i32 %a, 10
br i1 %cond, label %T1, label %F1
T1:
; CHECK: T1.split
; CHECK: %v1 = call i32 @f1()
; CHECK-NEXT: %retVal
; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
; CHECK: F1.split
; CHECK: %v2 = call i32 @f2()
; CHECK-NEXT: %retVal
; CHECK-NEXT: %condGuard
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard
; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
; CHECK: Merge
; CHECK-NOT: call void(i1, ...) @llvm.experimental.guard(
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp slt i32 %a, 20
call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
ret i32 %retVal
}
define i32 @not_branch_implies_guard(i32 %a) {
; CHECK-LABEL: @not_branch_implies_guard(
%cond = icmp slt i32 %a, 20
br i1 %cond, label %T1, label %F1
T1:
; CHECK: T1.split:
; CHECK-NEXT: %v1 = call i32 @f1()
; CHECK-NEXT: %retVal
; CHECK-NEXT: %condGuard
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard
; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
; CHECK: F1.split:
; CHECK-NEXT: %v2 = call i32 @f2()
; CHECK-NEXT: %retVal
; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
; CHECK: Merge
; CHECK-NOT: call void(i1, ...) @llvm.experimental.guard(
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp sgt i32 %a, 10
call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
ret i32 %retVal
}
define i32 @branch_overlaps_guard(i32 %a) {
; CHECK-LABEL: @branch_overlaps_guard(
%cond = icmp slt i32 %a, 20
br i1 %cond, label %T1, label %F1
T1:
; CHECK: T1:
; CHECK-NEXT: %v1 = call i32 @f1()
; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
; CHECK: F1:
; CHECK-NEXT: %v2 = call i32 @f2()
; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
; CHECK: Merge
; CHECK: %condGuard = icmp slt i32 %a, 10
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard) [ "deopt"() ]
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp slt i32 %a, 10
call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
ret i32 %retVal
}
define i32 @branch_doesnt_overlap_guard(i32 %a) {
; CHECK-LABEL: @branch_doesnt_overlap_guard(
%cond = icmp slt i32 %a, 10
br i1 %cond, label %T1, label %F1
T1:
; CHECK: T1:
; CHECK-NEXT: %v1 = call i32 @f1()
; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
; CHECK: F1:
; CHECK-NEXT: %v2 = call i32 @f2()
; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
; CHECK: Merge
; CHECK: %condGuard = icmp sgt i32 %a, 20
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard) [ "deopt"() ]
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp sgt i32 %a, 20
call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
ret i32 %retVal
}

View File

@ -201,53 +201,6 @@ TEST_F(CloneInstruction, CallingConvention) {
delete F2;
}
TEST_F(CloneInstruction, DuplicateInstructionsToSplit) {
Type *ArgTy1[] = {Type::getInt32PtrTy(context)};
FunctionType *FT = FunctionType::get(Type::getVoidTy(context), ArgTy1, false);
V = new Argument(Type::getInt32Ty(context));
Function *F = Function::Create(FT, Function::ExternalLinkage);
BasicBlock *BB1 = BasicBlock::Create(context, "", F);
IRBuilder<> Builder1(BB1);
BasicBlock *BB2 = BasicBlock::Create(context, "", F);
IRBuilder<> Builder2(BB2);
Builder1.CreateBr(BB2);
Instruction *AddInst = cast<Instruction>(Builder2.CreateAdd(V, V));
Instruction *MulInst = cast<Instruction>(Builder2.CreateMul(AddInst, V));
Instruction *SubInst = cast<Instruction>(Builder2.CreateSub(MulInst, V));
Builder2.CreateRetVoid();
ValueToValueMapTy Mapping;
auto Split = DuplicateInstructionsInSplitBetween(BB2, BB1, SubInst, Mapping);
EXPECT_TRUE(Split);
EXPECT_EQ(Mapping.size(), 2u);
EXPECT_TRUE(Mapping.find(AddInst) != Mapping.end());
EXPECT_TRUE(Mapping.find(MulInst) != Mapping.end());
auto AddSplit = dyn_cast<Instruction>(Mapping[AddInst]);
EXPECT_TRUE(AddSplit);
EXPECT_EQ(AddSplit->getOperand(0), V);
EXPECT_EQ(AddSplit->getOperand(1), V);
EXPECT_EQ(AddSplit->getParent(), Split);
auto MulSplit = dyn_cast<Instruction>(Mapping[MulInst]);
EXPECT_TRUE(MulSplit);
EXPECT_EQ(MulSplit->getOperand(0), AddSplit);
EXPECT_EQ(MulSplit->getOperand(1), V);
EXPECT_EQ(MulSplit->getParent(), Split);
EXPECT_EQ(AddSplit->getNextNode(), MulSplit);
EXPECT_EQ(MulSplit->getNextNode(), Split->getTerminator());
delete F;
}
class CloneFunc : public ::testing::Test {
protected:
void SetUp() override {