mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
afd08c7313
Summary: ...loop after the last iteration. This is really hard to do correctly. The core problem is that we need to model liveness through the induction PHIs from iteration to iteration in order to get the correct results, and we need to correctly de-duplicate the common subgraphs of instructions feeding some subset of the induction PHIs. All of this can be driven either from a side effect at some iteration or from the loop values used after the loop finishes. This patch implements this by storing the forward-propagating analysis of each instruction in a cache to recall whether it was free and whether it has become live and thus counted toward the total unroll cost. Then, at each sink for a value in the loop, we recursively walk back through every value that feeds the sink, including looping back through the iterations as needed, until we have marked the entire input graph as live. Because we cache this, we never visit instructions more than twice -- once when we analyze them and put them into the cache, and once when we count their cost towards the unrolled loop. Also, because the cache is only two bits and because we are dealing with relatively small iteration counts, we can store all of this very densely in memory to avoid this from becoming an excessively slow analysis. The code here is still pretty gross. I would appreciate suggestions about better ways to factor or split this up, I've stared too long at the algorithmic side to really have a good sense of what the design should probably look at. Also, it might seem like we should do all of this bottom-up, but I think that is a red herring. Specifically, the simplification power is *much* greater working top-down. We can forward propagate very effectively, even across strange and interesting recurrances around the backedge. Because we use data to propagate, this doesn't cause a state space explosion. Doing this level of constant folding, etc, would be very expensive to do bottom-up because it wouldn't be until the last moment that you could collapse everything. The current solution is essentially a top-down simplification with a bottom-up cost accounting which seems to get the best of both worlds. It makes the simplification incremental and powerful while leaving everything dead until we *know* it is needed. Finally, a core property of this approach is its *monotonicity*. At all times, the current UnrolledCost is a conservatively low estimate. This ensures that we will never early-exit from the analysis due to exceeding a threshold when if we had continued, the cost would have gone back below the threshold. These kinds of bugs can cause incredibly hard to track down random changes to behavior. We could use a techinque similar (but much simpler) within the inliner as well to avoid considering speculated code in the inline cost. Reviewers: chandlerc Subscribers: sanjoy, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D11758 llvm-svn: 269388
332 lines
12 KiB
C++
332 lines
12 KiB
C++
//===- UnrollAnalyzerTest.cpp - UnrollAnalyzer unit tests -----------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/AsmParser/Parser.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
|
|
#include "llvm/IR/Dominators.h"
|
|
#include "gtest/gtest.h"
|
|
|
|
using namespace llvm;
|
|
namespace llvm {
|
|
void initializeUnrollAnalyzerTestPass(PassRegistry &);
|
|
|
|
static SmallVector<DenseMap<Value *, Constant *>, 16> SimplifiedValuesVector;
|
|
static unsigned TripCount = 0;
|
|
|
|
namespace {
|
|
struct UnrollAnalyzerTest : public FunctionPass {
|
|
static char ID;
|
|
bool runOnFunction(Function &F) override {
|
|
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
|
|
|
Function::iterator FI = F.begin();
|
|
FI++; // First basic block is entry - skip it.
|
|
BasicBlock *Header = &*FI++;
|
|
Loop *L = LI->getLoopFor(Header);
|
|
BasicBlock *Exiting = L->getExitingBlock();
|
|
|
|
SimplifiedValuesVector.clear();
|
|
TripCount = SE->getSmallConstantTripCount(L, Exiting);
|
|
for (unsigned Iteration = 0; Iteration < TripCount; Iteration++) {
|
|
DenseMap<Value *, Constant *> SimplifiedValues;
|
|
UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, *SE, L);
|
|
for (auto *BB : L->getBlocks())
|
|
for (Instruction &I : *BB)
|
|
Analyzer.visit(I);
|
|
SimplifiedValuesVector.push_back(SimplifiedValues);
|
|
}
|
|
return false;
|
|
}
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<DominatorTreeWrapperPass>();
|
|
AU.addRequired<LoopInfoWrapperPass>();
|
|
AU.addRequired<ScalarEvolutionWrapperPass>();
|
|
AU.setPreservesAll();
|
|
}
|
|
UnrollAnalyzerTest() : FunctionPass(ID) {
|
|
initializeUnrollAnalyzerTestPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
};
|
|
}
|
|
|
|
char UnrollAnalyzerTest::ID = 0;
|
|
|
|
std::unique_ptr<Module> makeLLVMModule(LLVMContext &Context,
|
|
UnrollAnalyzerTest *P,
|
|
const char *ModuleStr) {
|
|
SMDiagnostic Err;
|
|
return parseAssemblyString(ModuleStr, Err, Context);
|
|
}
|
|
|
|
TEST(UnrollAnalyzerTest, BasicSimplifications) {
|
|
const char *ModuleStr =
|
|
"target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
|
|
"define i64 @propagate_loop_phis() {\n"
|
|
"entry:\n"
|
|
" br label %loop\n"
|
|
"loop:\n"
|
|
" %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]\n"
|
|
" %x0 = phi i64 [ 0, %entry ], [ %x2, %loop ]\n"
|
|
" %x1 = or i64 %x0, 1\n"
|
|
" %x2 = or i64 %x1, 2\n"
|
|
" %inc = add nuw nsw i64 %iv, 1\n"
|
|
" %cond = icmp sge i64 %inc, 8\n"
|
|
" br i1 %cond, label %loop.end, label %loop\n"
|
|
"loop.end:\n"
|
|
" %x.lcssa = phi i64 [ %x2, %loop ]\n"
|
|
" ret i64 %x.lcssa\n"
|
|
"}\n";
|
|
UnrollAnalyzerTest *P = new UnrollAnalyzerTest();
|
|
LLVMContext Context;
|
|
std::unique_ptr<Module> M = makeLLVMModule(Context, P, ModuleStr);
|
|
legacy::PassManager Passes;
|
|
Passes.add(P);
|
|
Passes.run(*M);
|
|
|
|
// Perform checks
|
|
Module::iterator MI = M->begin();
|
|
Function *F = &*MI++;
|
|
Function::iterator FI = F->begin();
|
|
FI++; // First basic block is entry - skip it.
|
|
BasicBlock *Header = &*FI++;
|
|
|
|
BasicBlock::iterator BBI = Header->begin();
|
|
std::advance(BBI, 4);
|
|
Instruction *Y1 = &*BBI++;
|
|
Instruction *Y2 = &*BBI++;
|
|
// Check simplification expected on the 1st iteration.
|
|
// Check that "%inc = add nuw nsw i64 %iv, 1" is simplified to 1
|
|
auto I1 = SimplifiedValuesVector[0].find(Y1);
|
|
EXPECT_TRUE(I1 != SimplifiedValuesVector[0].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I1).second)->getZExtValue(), 1U);
|
|
|
|
// Check that "%cond = icmp sge i64 %inc, 10" is simplified to false
|
|
auto I2 = SimplifiedValuesVector[0].find(Y2);
|
|
EXPECT_TRUE(I2 != SimplifiedValuesVector[0].end());
|
|
EXPECT_FALSE(cast<ConstantInt>((*I2).second)->getZExtValue());
|
|
|
|
// Check simplification expected on the last iteration.
|
|
// Check that "%inc = add nuw nsw i64 %iv, 1" is simplified to 8
|
|
I1 = SimplifiedValuesVector[TripCount - 1].find(Y1);
|
|
EXPECT_TRUE(I1 != SimplifiedValuesVector[TripCount - 1].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I1).second)->getZExtValue(), TripCount);
|
|
|
|
// Check that "%cond = icmp sge i64 %inc, 10" is simplified to false
|
|
I2 = SimplifiedValuesVector[TripCount - 1].find(Y2);
|
|
EXPECT_TRUE(I2 != SimplifiedValuesVector[TripCount - 1].end());
|
|
EXPECT_TRUE(cast<ConstantInt>((*I2).second)->getZExtValue());
|
|
}
|
|
|
|
TEST(UnrollAnalyzerTest, OuterLoopSimplification) {
|
|
const char *ModuleStr =
|
|
"target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
|
|
"define void @foo() {\n"
|
|
"entry:\n"
|
|
" br label %outer.loop\n"
|
|
"outer.loop:\n"
|
|
" %iv.outer = phi i64 [ 0, %entry ], [ %iv.outer.next, %outer.loop.latch ]\n"
|
|
" %iv.outer.next = add nuw nsw i64 %iv.outer, 1\n"
|
|
" br label %inner.loop\n"
|
|
"inner.loop:\n"
|
|
" %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]\n"
|
|
" %iv.inner.next = add nuw nsw i64 %iv.inner, 1\n"
|
|
" %exitcond.inner = icmp eq i64 %iv.inner.next, 1000\n"
|
|
" br i1 %exitcond.inner, label %outer.loop.latch, label %inner.loop\n"
|
|
"outer.loop.latch:\n"
|
|
" %exitcond.outer = icmp eq i64 %iv.outer.next, 40\n"
|
|
" br i1 %exitcond.outer, label %exit, label %outer.loop\n"
|
|
"exit:\n"
|
|
" ret void\n"
|
|
"}\n";
|
|
|
|
UnrollAnalyzerTest *P = new UnrollAnalyzerTest();
|
|
LLVMContext Context;
|
|
std::unique_ptr<Module> M = makeLLVMModule(Context, P, ModuleStr);
|
|
legacy::PassManager Passes;
|
|
Passes.add(P);
|
|
Passes.run(*M);
|
|
|
|
Module::iterator MI = M->begin();
|
|
Function *F = &*MI++;
|
|
Function::iterator FI = F->begin();
|
|
FI++;
|
|
BasicBlock *Header = &*FI++;
|
|
BasicBlock *InnerBody = &*FI++;
|
|
|
|
BasicBlock::iterator BBI = Header->begin();
|
|
BBI++;
|
|
Instruction *Y1 = &*BBI;
|
|
BBI = InnerBody->begin();
|
|
BBI++;
|
|
Instruction *Y2 = &*BBI;
|
|
// Check that we can simplify IV of the outer loop, but can't simplify the IV
|
|
// of the inner loop if we only know the iteration number of the outer loop.
|
|
//
|
|
// Y1 is %iv.outer.next, Y2 is %iv.inner.next
|
|
auto I1 = SimplifiedValuesVector[0].find(Y1);
|
|
EXPECT_TRUE(I1 != SimplifiedValuesVector[0].end());
|
|
auto I2 = SimplifiedValuesVector[0].find(Y2);
|
|
EXPECT_TRUE(I2 == SimplifiedValuesVector[0].end());
|
|
}
|
|
TEST(UnrollAnalyzerTest, CmpSimplifications) {
|
|
const char *ModuleStr =
|
|
"target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
|
|
"define void @branch_iv_trunc() {\n"
|
|
"entry:\n"
|
|
" br label %for.body\n"
|
|
"for.body:\n"
|
|
" %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.body ]\n"
|
|
" %tmp2 = trunc i64 %indvars.iv to i32\n"
|
|
" %cmp3 = icmp eq i32 %tmp2, 5\n"
|
|
" %tmp3 = add nuw nsw i64 %indvars.iv, 1\n"
|
|
" %exitcond = icmp eq i64 %tmp3, 10\n"
|
|
" br i1 %exitcond, label %for.end, label %for.body\n"
|
|
"for.end:\n"
|
|
" ret void\n"
|
|
"}\n";
|
|
UnrollAnalyzerTest *P = new UnrollAnalyzerTest();
|
|
LLVMContext Context;
|
|
std::unique_ptr<Module> M = makeLLVMModule(Context, P, ModuleStr);
|
|
legacy::PassManager Passes;
|
|
Passes.add(P);
|
|
Passes.run(*M);
|
|
|
|
// Perform checks
|
|
Module::iterator MI = M->begin();
|
|
Function *F = &*MI++;
|
|
Function::iterator FI = F->begin();
|
|
FI++; // First basic block is entry - skip it.
|
|
BasicBlock *Header = &*FI++;
|
|
|
|
BasicBlock::iterator BBI = Header->begin();
|
|
BBI++;
|
|
Instruction *Y1 = &*BBI++;
|
|
Instruction *Y2 = &*BBI++;
|
|
// Check simplification expected on the 5th iteration.
|
|
// Check that "%tmp2 = trunc i64 %indvars.iv to i32" is simplified to 5
|
|
// and "%cmp3 = icmp eq i32 %tmp2, 5" is simplified to 1 (i.e. true).
|
|
auto I1 = SimplifiedValuesVector[5].find(Y1);
|
|
EXPECT_TRUE(I1 != SimplifiedValuesVector[5].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I1).second)->getZExtValue(), 5U);
|
|
auto I2 = SimplifiedValuesVector[5].find(Y2);
|
|
EXPECT_TRUE(I2 != SimplifiedValuesVector[5].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I2).second)->getZExtValue(), 1U);
|
|
}
|
|
TEST(UnrollAnalyzerTest, PtrCmpSimplifications) {
|
|
const char *ModuleStr =
|
|
"target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
|
|
"define void @ptr_cmp(i8 *%a) {\n"
|
|
"entry:\n"
|
|
" %limit = getelementptr i8, i8* %a, i64 40\n"
|
|
" %start.iv2 = getelementptr i8, i8* %a, i64 7\n"
|
|
" br label %loop.body\n"
|
|
"loop.body:\n"
|
|
" %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]\n"
|
|
" %iv2.0 = phi i8* [ %start.iv2, %entry ], [ %iv2.1, %loop.body ]\n"
|
|
" %cmp = icmp eq i8* %iv2.0, %iv.0\n"
|
|
" %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1\n"
|
|
" %iv2.1 = getelementptr inbounds i8, i8* %iv2.0, i64 1\n"
|
|
" %exitcond = icmp ne i8* %iv.1, %limit\n"
|
|
" br i1 %exitcond, label %loop.body, label %loop.exit\n"
|
|
"loop.exit:\n"
|
|
" ret void\n"
|
|
"}\n";
|
|
UnrollAnalyzerTest *P = new UnrollAnalyzerTest();
|
|
LLVMContext Context;
|
|
std::unique_ptr<Module> M = makeLLVMModule(Context, P, ModuleStr);
|
|
legacy::PassManager Passes;
|
|
Passes.add(P);
|
|
Passes.run(*M);
|
|
|
|
// Perform checks
|
|
Module::iterator MI = M->begin();
|
|
Function *F = &*MI++;
|
|
Function::iterator FI = F->begin();
|
|
FI++; // First basic block is entry - skip it.
|
|
BasicBlock *Header = &*FI;
|
|
|
|
BasicBlock::iterator BBI = Header->begin();
|
|
std::advance(BBI, 2);
|
|
Instruction *Y1 = &*BBI;
|
|
// Check simplification expected on the 5th iteration.
|
|
// Check that "%cmp = icmp eq i8* %iv2.0, %iv.0" is simplified to 0.
|
|
auto I1 = SimplifiedValuesVector[5].find(Y1);
|
|
EXPECT_TRUE(I1 != SimplifiedValuesVector[5].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I1).second)->getZExtValue(), 0U);
|
|
}
|
|
TEST(UnrollAnalyzerTest, CastSimplifications) {
|
|
const char *ModuleStr =
|
|
"target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
|
|
"@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 1, i32 0, i32 1, i32 0, i32 259, i32 0, i32 1, i32 0, i32 1], align 16\n"
|
|
"define void @const_load_cast() {\n"
|
|
"entry:\n"
|
|
" br label %loop\n"
|
|
"\n"
|
|
"loop:\n"
|
|
" %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]\n"
|
|
" %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv\n"
|
|
" %const_array_element = load i32, i32* %array_const_idx, align 4\n"
|
|
" %se = sext i32 %const_array_element to i64\n"
|
|
" %ze = zext i32 %const_array_element to i64\n"
|
|
" %tr = trunc i32 %const_array_element to i8\n"
|
|
" %inc = add nuw nsw i64 %iv, 1\n"
|
|
" %exitcond86.i = icmp eq i64 %inc, 10\n"
|
|
" br i1 %exitcond86.i, label %loop.end, label %loop\n"
|
|
"\n"
|
|
"loop.end:\n"
|
|
" ret void\n"
|
|
"}\n";
|
|
|
|
UnrollAnalyzerTest *P = new UnrollAnalyzerTest();
|
|
LLVMContext Context;
|
|
std::unique_ptr<Module> M = makeLLVMModule(Context, P, ModuleStr);
|
|
legacy::PassManager Passes;
|
|
Passes.add(P);
|
|
Passes.run(*M);
|
|
|
|
// Perform checks
|
|
Module::iterator MI = M->begin();
|
|
Function *F = &*MI++;
|
|
Function::iterator FI = F->begin();
|
|
FI++; // First basic block is entry - skip it.
|
|
BasicBlock *Header = &*FI++;
|
|
|
|
BasicBlock::iterator BBI = Header->begin();
|
|
std::advance(BBI, 3);
|
|
Instruction *Y1 = &*BBI++;
|
|
Instruction *Y2 = &*BBI++;
|
|
Instruction *Y3 = &*BBI++;
|
|
// Check simplification expected on the 5th iteration.
|
|
// "%se = sext i32 %const_array_element to i64" should be simplified to 259,
|
|
// "%ze = zext i32 %const_array_element to i64" should be simplified to 259,
|
|
// "%tr = trunc i32 %const_array_element to i8" should be simplified to 3.
|
|
auto I1 = SimplifiedValuesVector[5].find(Y1);
|
|
EXPECT_TRUE(I1 != SimplifiedValuesVector[5].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I1).second)->getZExtValue(), 259U);
|
|
auto I2 = SimplifiedValuesVector[5].find(Y2);
|
|
EXPECT_TRUE(I2 != SimplifiedValuesVector[5].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I2).second)->getZExtValue(), 259U);
|
|
auto I3 = SimplifiedValuesVector[5].find(Y3);
|
|
EXPECT_TRUE(I3 != SimplifiedValuesVector[5].end());
|
|
EXPECT_EQ(cast<ConstantInt>((*I3).second)->getZExtValue(), 3U);
|
|
}
|
|
|
|
} // end namespace llvm
|
|
|
|
INITIALIZE_PASS_BEGIN(UnrollAnalyzerTest, "unrollanalyzertestpass",
|
|
"unrollanalyzertestpass", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
|
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
|
INITIALIZE_PASS_END(UnrollAnalyzerTest, "unrollanalyzertestpass",
|
|
"unrollanalyzertestpass", false, false)
|