From 2e6ad7c3a0a2fa16ae32c87eb13a675f637f3b7f Mon Sep 17 00:00:00 2001 From: Haicheng Wu Date: Fri, 20 Jan 2017 18:51:22 +0000 Subject: [PATCH] Recommit "[InlineCost] Use TTI to check if GEP is free." #3 This is the third attemp to recommit r292526. The original summary: Currently, a GEP is considered free only if its indices are all constant. TTI::getGEPCost() can give target-specific more accurate analysis. TTI is already used for the cost of many other instructions. llvm-svn: 292633 --- lib/Analysis/InlineCost.cpp | 20 +++++++++++-- test/Transforms/Inline/AArch64/gep-cost.ll | 30 ++++++++++++++++++++ test/Transforms/Inline/AArch64/lit.local.cfg | 2 ++ 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/Inline/AArch64/gep-cost.ll create mode 100644 test/Transforms/Inline/AArch64/lit.local.cfg diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 4109049ecab..b6c3997dae3 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -134,6 +134,7 @@ class CallAnalyzer : public InstVisitor { void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool isGEPFree(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); @@ -331,6 +332,21 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { return true; } +/// \brief Use TTI to check whether a GEP is free. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { + SmallVector Indices; + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) + Indices.push_back(SimpleOp); + else + Indices.push_back(*I); + return TargetTransformInfo::TCC_Free == + TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(), + Indices); +} + bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Check whether inlining will turn a dynamic alloca into a static // alloca and handle that case. @@ -396,7 +412,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Non-constant GEPs aren't folded, and disable SROA. if (SROACandidate) disableSROA(CostIt); - return false; + return isGEPFree(I); } // Add the result as a new mapping to Base + Offset. @@ -422,7 +438,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Variable GEPs will require math and will disable SROA. if (SROACandidate) disableSROA(CostIt); - return false; + return isGEPFree(I); } bool CallAnalyzer::visitBitCast(BitCastInst &I) { diff --git a/test/Transforms/Inline/AArch64/gep-cost.ll b/test/Transforms/Inline/AArch64/gep-cost.ll new file mode 100644 index 00000000000..204958f082d --- /dev/null +++ b/test/Transforms/Inline/AArch64/gep-cost.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: opt -inline -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define void @outer([4 x i32]* %ptr, i32 %i) { + call void @inner1([4 x i32]* %ptr, i32 %i) + call void @inner2([4 x i32]* %ptr, i32 %i) + ret void +} +; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64. +; Thus, both the gep and ret can be simplified. +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define void @inner1([4 x i32]* %ptr, i32 %i) { + %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i + ret void +} + +; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for +; AArch64. Thus, only the ret can be simplified and not the gep. +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 1 +; CHECK: NumInstructions: 2 +define void @inner2([4 x i32]* %ptr, i32 %i) { + %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i + ret void +} diff --git a/test/Transforms/Inline/AArch64/lit.local.cfg b/test/Transforms/Inline/AArch64/lit.local.cfg new file mode 100644 index 00000000000..7184443994b --- /dev/null +++ b/test/Transforms/Inline/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True