mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
Recommit "[InlineCost] Use TTI to check if GEP is free." #3
This is the third attemp to recommit r292526. The original summary: Currently, a GEP is considered free only if its indices are all constant. TTI::getGEPCost() can give target-specific more accurate analysis. TTI is already used for the cost of many other instructions. llvm-svn: 292633
This commit is contained in:
parent
f9f1030204
commit
2e6ad7c3a0
@ -134,6 +134,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
|||||||
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
|
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
|
||||||
int InstructionCost);
|
int InstructionCost);
|
||||||
bool isGEPOffsetConstant(GetElementPtrInst &GEP);
|
bool isGEPOffsetConstant(GetElementPtrInst &GEP);
|
||||||
|
bool isGEPFree(GetElementPtrInst &GEP);
|
||||||
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
|
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
|
||||||
bool simplifyCallSite(Function *F, CallSite CS);
|
bool simplifyCallSite(Function *F, CallSite CS);
|
||||||
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
|
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
|
||||||
@ -331,6 +332,21 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Use TTI to check whether a GEP is free.
|
||||||
|
///
|
||||||
|
/// Respects any simplified values known during the analysis of this callsite.
|
||||||
|
bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
|
||||||
|
SmallVector<Value *, 4> Indices;
|
||||||
|
for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
|
||||||
|
if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
|
||||||
|
Indices.push_back(SimpleOp);
|
||||||
|
else
|
||||||
|
Indices.push_back(*I);
|
||||||
|
return TargetTransformInfo::TCC_Free ==
|
||||||
|
TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
|
||||||
|
Indices);
|
||||||
|
}
|
||||||
|
|
||||||
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
|
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
|
||||||
// Check whether inlining will turn a dynamic alloca into a static
|
// Check whether inlining will turn a dynamic alloca into a static
|
||||||
// alloca and handle that case.
|
// alloca and handle that case.
|
||||||
@ -396,7 +412,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
|
|||||||
// Non-constant GEPs aren't folded, and disable SROA.
|
// Non-constant GEPs aren't folded, and disable SROA.
|
||||||
if (SROACandidate)
|
if (SROACandidate)
|
||||||
disableSROA(CostIt);
|
disableSROA(CostIt);
|
||||||
return false;
|
return isGEPFree(I);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the result as a new mapping to Base + Offset.
|
// Add the result as a new mapping to Base + Offset.
|
||||||
@ -422,7 +438,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
|
|||||||
// Variable GEPs will require math and will disable SROA.
|
// Variable GEPs will require math and will disable SROA.
|
||||||
if (SROACandidate)
|
if (SROACandidate)
|
||||||
disableSROA(CostIt);
|
disableSROA(CostIt);
|
||||||
return false;
|
return isGEPFree(I);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
|
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
|
||||||
|
30
test/Transforms/Inline/AArch64/gep-cost.ll
Normal file
30
test/Transforms/Inline/AArch64/gep-cost.ll
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
; REQUIRES: asserts
|
||||||
|
; RUN: opt -inline -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||||
|
target triple = "aarch64--linux-gnu"
|
||||||
|
|
||||||
|
define void @outer([4 x i32]* %ptr, i32 %i) {
|
||||||
|
call void @inner1([4 x i32]* %ptr, i32 %i)
|
||||||
|
call void @inner2([4 x i32]* %ptr, i32 %i)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64.
|
||||||
|
; Thus, both the gep and ret can be simplified.
|
||||||
|
; CHECK: Analyzing call of inner1
|
||||||
|
; CHECK: NumInstructionsSimplified: 2
|
||||||
|
; CHECK: NumInstructions: 2
|
||||||
|
define void @inner1([4 x i32]* %ptr, i32 %i) {
|
||||||
|
%G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for
|
||||||
|
; AArch64. Thus, only the ret can be simplified and not the gep.
|
||||||
|
; CHECK: Analyzing call of inner2
|
||||||
|
; CHECK: NumInstructionsSimplified: 1
|
||||||
|
; CHECK: NumInstructions: 2
|
||||||
|
define void @inner2([4 x i32]* %ptr, i32 %i) {
|
||||||
|
%G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i
|
||||||
|
ret void
|
||||||
|
}
|
2
test/Transforms/Inline/AArch64/lit.local.cfg
Normal file
2
test/Transforms/Inline/AArch64/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
if not 'AArch64' in config.root.targets:
|
||||||
|
config.unsupported = True
|
Loading…
Reference in New Issue
Block a user