mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Teach InlineCost about address spaces
Summary: I basically copied this patch from here: https://reviews.llvm.org/D1251 But I skipped some of the refactoring to make the patch more clean. The new outer3/inner3 test case in ptr-diff.ll triggers the following assert without this patch: lib/IR/Constants.cpp:1834: static llvm::Constant *llvm::ConstantExpr::getCompare(unsigned short, llvm::Constant *, llvm::Constant *, bool): Assertion `C1->getType() == C2->getType() && "Op types should be identical!"' failed. The other new test cases makes sure that there is code coverage for all modifications in InlineCost.cpp (getting different values due to not fetching sizes for address space zero). I only guarantee code coverage for those tests. The tests are not written in a way that they would break if not having the corrections in InlineCost.cpp. I found it quite hard to fine tune the tests into getting different results based on the pointer sizes (except for the test case where we hit an assert if not teaching InlineCost about address spaces). Reviewers: chandlerc, arsenm, haicheng Reviewed By: arsenm Subscribers: wdng, eraman, llvm-commits, haicheng Differential Revision: https://reviews.llvm.org/D40455 llvm-svn: 321809
This commit is contained in:
parent
8bc61eb94d
commit
793f08bf63
@ -371,7 +371,7 @@ void CallAnalyzer::disableLoadElimination() {
|
||||
/// Returns false if unable to compute the offset for any reason. Respects any
|
||||
/// simplified values known during the analysis of this callsite.
|
||||
bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
|
||||
unsigned IntPtrWidth = DL.getPointerSizeInBits();
|
||||
unsigned IntPtrWidth = DL.getPointerTypeSizeInBits(GEP.getType());
|
||||
assert(IntPtrWidth == Offset.getBitWidth());
|
||||
|
||||
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
|
||||
@ -450,8 +450,12 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
|
||||
// SROA if it *might* be used in an inappropriate manner.
|
||||
|
||||
// Phi nodes are always zero-cost.
|
||||
|
||||
APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits());
|
||||
// FIXME: Pointer sizes may differ between different address spaces, so do we
|
||||
// need to use correct address space in the call to getPointerSizeInBits here?
|
||||
// Or could we skip the getPointerSizeInBits call completely? As far as I can
|
||||
// see the ZeroOffset is used as a dummy value, so we can probably use any
|
||||
// bit width for the ZeroOffset?
|
||||
APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
|
||||
bool CheckSROA = I.getType()->isPointerTy();
|
||||
|
||||
// Track the constant or pointer with constant offset we've seen so far.
|
||||
@ -641,7 +645,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
|
||||
// Track base/offset pairs when converted to a plain integer provided the
|
||||
// integer is large enough to represent the pointer.
|
||||
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
|
||||
if (IntegerSize >= DL.getPointerSizeInBits()) {
|
||||
unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
|
||||
if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
|
||||
std::pair<Value *, APInt> BaseAndOffset =
|
||||
ConstantOffsetPtrs.lookup(I.getOperand(0));
|
||||
if (BaseAndOffset.first)
|
||||
@ -674,7 +679,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
|
||||
// modifications provided the integer is not too large.
|
||||
Value *Op = I.getOperand(0);
|
||||
unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
|
||||
if (IntegerSize <= DL.getPointerSizeInBits()) {
|
||||
if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
|
||||
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
|
||||
if (BaseAndOffset.first)
|
||||
ConstantOffsetPtrs[&I] = BaseAndOffset;
|
||||
@ -1608,7 +1613,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
|
||||
if (!V->getType()->isPointerTy())
|
||||
return nullptr;
|
||||
|
||||
unsigned IntPtrWidth = DL.getPointerSizeInBits();
|
||||
unsigned AS = V->getType()->getPointerAddressSpace();
|
||||
unsigned IntPtrWidth = DL.getPointerSizeInBits(AS);
|
||||
APInt Offset = APInt::getNullValue(IntPtrWidth);
|
||||
|
||||
// Even though we don't look through PHI nodes, we could be called on an
|
||||
@ -1632,7 +1638,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
|
||||
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
|
||||
} while (Visited.insert(V).second);
|
||||
|
||||
Type *IntPtrTy = DL.getIntPtrType(V->getContext());
|
||||
Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
|
||||
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
|
||||
}
|
||||
|
||||
@ -1904,7 +1910,8 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
|
||||
// size of the byval type by the target's pointer size.
|
||||
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
|
||||
unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
|
||||
unsigned PointerSize = DL.getPointerSizeInBits();
|
||||
unsigned AS = PTy->getAddressSpace();
|
||||
unsigned PointerSize = DL.getPointerSizeInBits(AS);
|
||||
// Ceiling division.
|
||||
unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
; RUN: opt < %s -inline -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
|
||||
|
||||
target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64"
|
||||
|
||||
; Inlining a byval struct should cause an explicit copy into an alloca.
|
||||
|
||||
%struct.ss = type { i32, i64 }
|
||||
@ -128,3 +130,27 @@ entry:
|
||||
; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0, %struct.S0* @b, i64 0, i32 0), align 4
|
||||
; CHECK-NOT: load i32, i32* getelementptr inbounds (%struct.S0, %struct.S0* @b, i64 0, i32 0), align 4
|
||||
}
|
||||
|
||||
%struct.S1 = type { i32 }
|
||||
|
||||
@d = addrspace(1) global %struct.S1 { i32 1 }, align 4
|
||||
@c = common addrspace(1) global i32 0, align 4
|
||||
|
||||
define internal void @f5_as1(%struct.S1 addrspace(1)* byval nocapture readonly align 4 %p) {
|
||||
entry:
|
||||
store i32 0, i32 addrspace(1)* getelementptr inbounds (%struct.S1, %struct.S1 addrspace(1)* @d, i64 0, i32 0), align 4
|
||||
%f2 = getelementptr inbounds %struct.S1, %struct.S1 addrspace(1)* %p, i64 0, i32 0
|
||||
%0 = load i32, i32 addrspace(1)* %f2, align 4
|
||||
store i32 %0, i32 addrspace(1)* @c, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @test5_as1() {
|
||||
entry:
|
||||
tail call void @f5_as1(%struct.S1 addrspace(1)* byval align 4 @d)
|
||||
%0 = load i32, i32 addrspace(1)* @c, align 4
|
||||
ret i32 %0
|
||||
; CHECK: @test5_as1()
|
||||
; CHECK: store i32 0, i32 addrspace(1)* getelementptr inbounds (%struct.S1, %struct.S1 addrspace(1)* @d, i64 0, i32 0), align 4
|
||||
; CHECK-NOT: load i32, i32 addrspace(1)* getelementptr inbounds (%struct.S1, %struct.S1 addrspace(1)* @d, i64 0, i32 0), align 4
|
||||
}
|
||||
|
@ -30,6 +30,33 @@ else:
|
||||
ret i32 %t
|
||||
}
|
||||
|
||||
define i32 @outer1_as1(i32 addrspace(1)* %ptr) {
|
||||
; CHECK-LABEL: @outer1_as1(
|
||||
; CHECK-NOT: call
|
||||
; CHECK: ret i32
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i32 0
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i32 42
|
||||
%result = call i32 @inner1_as1(i32 addrspace(1)* %ptr1, i32 addrspace(1)* %ptr2)
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
; Make sure that the address space's larger size makes the ptrtoints
|
||||
; not no-ops preventing inlining
|
||||
define i32 @inner1_as1(i32 addrspace(1)* %begin, i32 addrspace(1)* %end) {
|
||||
%begin.i = ptrtoint i32 addrspace(1)* %begin to i32
|
||||
%end.i = ptrtoint i32 addrspace(1)* %end to i32
|
||||
%distance = sub i32 %end.i, %begin.i
|
||||
%icmp = icmp sle i32 %distance, 42
|
||||
br i1 %icmp, label %then, label %else
|
||||
|
||||
then:
|
||||
ret i32 3
|
||||
|
||||
else:
|
||||
%t = load i32, i32 addrspace(1)* %begin
|
||||
ret i32 %t
|
||||
}
|
||||
|
||||
define i32 @outer2(i32* %ptr) {
|
||||
; Test that an inbounds GEP disables this -- it isn't safe in general as
|
||||
; wrapping changes the behavior of lessthan and greaterthan comparisons.
|
||||
@ -59,6 +86,30 @@ else:
|
||||
ret i32 %t
|
||||
}
|
||||
|
||||
define i32 @outer3(i16* addrspace(1)* %ptr) {
|
||||
; CHECK-LABEL: @outer3(
|
||||
; CHECK-NOT: call i32
|
||||
; CHECK: ret i32 3
|
||||
; CHECK-LABEL: @inner3(
|
||||
%result = call i32 @inner3(i16* addrspace(1)* %ptr)
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i32 @inner3(i16* addrspace(1)* %ptr) {
|
||||
call void @extern()
|
||||
%ptr.i = ptrtoint i16* addrspace(1)* %ptr to i64
|
||||
%distance = sub i64 %ptr.i, %ptr.i
|
||||
%icmp = icmp eq i64 %distance, 0
|
||||
br i1 %icmp, label %then, label %else
|
||||
|
||||
then:
|
||||
ret i32 3
|
||||
|
||||
else:
|
||||
ret i32 5
|
||||
}
|
||||
|
||||
|
||||
; The inttoptrs are free since it is a smaller integer to a larger
|
||||
; pointer size
|
||||
define i32 @inttoptr_free_cost(i32 %a, i32 %b, i32 %c) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user