mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[SeparateConstOffsetFromGEP] sext(a)+sext(b) => sext(a+b) when a+b can't sign-overflow.
Summary: This patch implements my promised optimization to reunites certain sexts from operands after we extract the constant offset. See the header comment of reuniteExts for its motivation. One key building block that enables this optimization is Bjarke's poison value analysis (D11212). That helps to prove "a +nsw b" can't overflow. Reviewers: broune Subscribers: jholewinski, sanjoy, llvm-commits Differential Revision: http://reviews.llvm.org/D12016 llvm-svn: 245003
This commit is contained in:
parent
fe6f1d651e
commit
200cc080ad
@ -156,6 +156,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
@ -164,6 +165,7 @@
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@ -174,6 +176,7 @@
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
|
||||
"disable-separate-const-offset-from-gep", cl::init(false),
|
||||
@ -319,6 +322,7 @@ public:
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<ScalarEvolution>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
@ -373,15 +377,32 @@ private:
|
||||
///
|
||||
/// Verified in @i32_add in split-gep.ll
|
||||
bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
|
||||
/// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
|
||||
/// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
|
||||
/// the constant offset. After extraction, it becomes desirable to reunion the
|
||||
/// distributed sexts. For example,
|
||||
///
|
||||
/// &a[sext(i +nsw (j +nsw 5)]
|
||||
/// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
|
||||
/// => constant extraction &a[sext(i) + sext(j)] + 5
|
||||
/// => reunion &a[sext(i +nsw j)] + 5
|
||||
bool reuniteExts(Function &F);
|
||||
/// A helper that reunites sexts in an instruction.
|
||||
bool reuniteExts(Instruction *I);
|
||||
/// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
|
||||
Instruction *findClosestMatchingDominator(const SCEV *Key,
|
||||
Instruction *Dominatee);
|
||||
/// Verify F is free of dead code.
|
||||
void verifyNoDeadCode(Function &F);
|
||||
|
||||
const DataLayout *DL;
|
||||
const DominatorTree *DT;
|
||||
DominatorTree *DT;
|
||||
ScalarEvolution *SE;
|
||||
const TargetMachine *TM;
|
||||
/// Whether to lower a GEP with multiple indices into arithmetic operations or
|
||||
/// multiple GEPs with a single index.
|
||||
bool LowerGEP;
|
||||
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
@ -391,6 +412,7 @@ INITIALIZE_PASS_BEGIN(
|
||||
"Split GEPs to a variadic base and a constant offset for better CSE", false,
|
||||
false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(
|
||||
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
|
||||
@ -891,13 +913,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
|
||||
// Clear the inbounds attribute because the new index may be off-bound.
|
||||
// e.g.,
|
||||
//
|
||||
// b = add i64 a, 5
|
||||
// addr = gep inbounds float, float* p, i64 b
|
||||
// b = add i64 a, 5
|
||||
// addr = gep inbounds float, float* p, i64 b
|
||||
//
|
||||
// is transformed to:
|
||||
//
|
||||
// addr2 = gep float, float* p, i64 a ; inbounds removed
|
||||
// addr = gep inbounds float, float* addr2, i64 5
|
||||
// addr2 = gep float, float* p, i64 a ; inbounds removed
|
||||
// addr = gep inbounds float, float* addr2, i64 5
|
||||
//
|
||||
// If a is -4, although the old index b is in bounds, the new index a is
|
||||
// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
|
||||
@ -1013,6 +1035,7 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
|
||||
return false;
|
||||
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
SE = &getAnalysis<ScalarEvolution>();
|
||||
|
||||
bool Changed = false;
|
||||
for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
|
||||
@ -1025,12 +1048,84 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
|
||||
}
|
||||
}
|
||||
|
||||
Changed |= reuniteExts(F);
|
||||
|
||||
if (VerifyNoDeadCode)
|
||||
verifyNoDeadCode(F);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
|
||||
const SCEV *Key, Instruction *Dominatee) {
|
||||
auto Pos = DominatingExprs.find(Key);
|
||||
if (Pos == DominatingExprs.end())
|
||||
return nullptr;
|
||||
|
||||
auto &Candidates = Pos->second;
|
||||
// Because we process the basic blocks in pre-order of the dominator tree, a
|
||||
// candidate that doesn't dominate the current instruction won't dominate any
|
||||
// future instruction either. Therefore, we pop it out of the stack. This
|
||||
// optimization makes the algorithm O(n).
|
||||
while (!Candidates.empty()) {
|
||||
Instruction *Candidate = Candidates.back();
|
||||
if (DT->dominates(Candidate, Dominatee))
|
||||
return Candidate;
|
||||
Candidates.pop_back();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
|
||||
if (!SE->isSCEVable(I->getType()))
|
||||
return false;
|
||||
|
||||
// Dom: LHS+RHS
|
||||
// I: sext(LHS)+sext(RHS)
|
||||
// If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
|
||||
// TODO: handle zext
|
||||
Value *LHS = nullptr, *RHS = nullptr;
|
||||
if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
|
||||
match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
|
||||
if (LHS->getType() == RHS->getType()) {
|
||||
const SCEV *Key =
|
||||
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
|
||||
if (auto *Dom = findClosestMatchingDominator(Key, I)) {
|
||||
Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
|
||||
NewSExt->takeName(I);
|
||||
I->replaceAllUsesWith(NewSExt);
|
||||
RecursivelyDeleteTriviallyDeadInstructions(I);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
|
||||
if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
|
||||
match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
|
||||
if (isKnownNotFullPoison(I)) {
|
||||
const SCEV *Key =
|
||||
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
|
||||
DominatingExprs[Key].push_back(I);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
|
||||
bool Changed = false;
|
||||
DominatingExprs.clear();
|
||||
for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
|
||||
Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
|
||||
BasicBlock *BB = Node->getBlock();
|
||||
for (auto I = BB->begin(); I != BB->end(); ) {
|
||||
Instruction *Cur = I++;
|
||||
Changed |= reuniteExts(Cur);
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
|
||||
for (auto &B : F) {
|
||||
for (auto &I : B) {
|
||||
|
@ -194,3 +194,41 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
|
||||
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
|
||||
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
|
||||
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
|
||||
|
||||
|
||||
; The source code is:
|
||||
; p0 = &input[sext(x + y)];
|
||||
; p1 = &input[sext(x + (y + 5))];
|
||||
;
|
||||
; Without reuniting extensions, SeparateConstOffsetFromGEP would emit
|
||||
; p0 = &input[sext(x + y)];
|
||||
; t1 = &input[sext(x) + sext(y)];
|
||||
; p1 = &t1[5];
|
||||
;
|
||||
; With reuniting extensions, it merges p0 and t1 and thus emits
|
||||
; p0 = &input[sext(x + y)];
|
||||
; p1 = &p0[5];
|
||||
define void @reunion(i32 %x, i32 %y, float* %input) {
|
||||
; IR-LABEL: @reunion(
|
||||
; PTX-LABEL: reunion(
|
||||
entry:
|
||||
%xy = add nsw i32 %x, %y
|
||||
%0 = sext i32 %xy to i64
|
||||
%p0 = getelementptr inbounds float, float* %input, i64 %0
|
||||
%v0 = load float, float* %p0, align 4
|
||||
; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0:%rd[0-9]+]]{{\]}}
|
||||
call void @use(float %v0)
|
||||
|
||||
%y5 = add nsw i32 %y, 5
|
||||
%xy5 = add nsw i32 %x, %y5
|
||||
%1 = sext i32 %xy5 to i64
|
||||
%p1 = getelementptr inbounds float, float* %input, i64 %1
|
||||
; IR: getelementptr inbounds float, float* %p0, i64 5
|
||||
%v1 = load float, float* %p1, align 4
|
||||
; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0]]+20{{\]}}
|
||||
call void @use(float %v1)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @use(float)
|
||||
|
Loading…
Reference in New Issue
Block a user