mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AMDGPU] Tune inlining parameters for AMDGPU target (part 2)
Summary: Most of IR instructions got better code size estimations after commit 47a5c36b. So default parameters values should be updated to improve inlining and unrolling for the target. Reviewers: rampitec, arsenm Reviewed By: rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, zzheng, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70391
This commit is contained in:
parent
005447e1cc
commit
2fc96e575d
@ -39,7 +39,7 @@ using namespace llvm;
|
||||
#define DEBUG_TYPE "inline"
|
||||
|
||||
static cl::opt<int>
|
||||
ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
|
||||
ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000),
|
||||
cl::desc("Cost of alloca argument"));
|
||||
|
||||
// If the amount of scratch memory to eliminate exceeds our ability to allocate
|
||||
|
@ -57,7 +57,7 @@ using namespace llvm;
|
||||
static cl::opt<unsigned> UnrollThresholdPrivate(
|
||||
"amdgpu-unroll-threshold-private",
|
||||
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
|
||||
cl::init(2000), cl::Hidden);
|
||||
cl::init(2700), cl::Hidden);
|
||||
|
||||
static cl::opt<unsigned> UnrollThresholdLocal(
|
||||
"amdgpu-unroll-threshold-local",
|
||||
|
@ -204,7 +204,7 @@ public:
|
||||
bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const;
|
||||
|
||||
unsigned getInliningThresholdMultiplier() { return 9; }
|
||||
unsigned getInliningThresholdMultiplier() { return 11; }
|
||||
|
||||
int getInlinerVectorBonusPercent() { return 0; }
|
||||
|
||||
|
@ -28,8 +28,15 @@ if.end: ; preds = %if.then, %entry
|
||||
define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
|
||||
entry:
|
||||
%tmp1 = load float, float addrspace(5)* %p1, align 4
|
||||
%cmp = fcmp ogt float %tmp1, 1.000000e+00
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
%div = fdiv float 2.000000e+00, %tmp1
|
||||
store float %div, float addrspace(5)* %p2, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=12000 %s | FileCheck %s
|
||||
; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S %s | FileCheck %s
|
||||
|
||||
; Check that we full unroll loop to be able to eliminate alloca
|
||||
; CHECK-LABEL: @non_invariant_ind
|
||||
|
Loading…
Reference in New Issue
Block a user