mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[CodeGenPrepare] Don't sink non-cheap addrspacecasts.
Summary: Previously, CGP would unconditionally sink addrspacecast instructions, even going so far as to sink them into a loop. Now we check that the cast is "cheap", as defined by TLI. We introduce a new "is-cheap" function to TLI rather than using isNopAddrSpaceCast because some GPU platforms want the ability to ask for non-nop casts to be sunk. Reviewers: arsenm, tra Subscribers: jholewinski, wdng, llvm-commits Differential Revision: https://reviews.llvm.org/D26923 llvm-svn: 287591
This commit is contained in:
parent
864b1594a7
commit
83caad013a
@ -1153,6 +1153,12 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
|
||||
/// are happy to sink it into basic blocks.
|
||||
virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
|
||||
return isNoopAddrSpaceCast(SrcAS, DestAS);
|
||||
}
|
||||
|
||||
/// Return true if the pointer arguments to CI should be aligned by aligning
|
||||
/// the object whose address is being passed. If so then MinSize is set to the
|
||||
/// minimum size the object must be to be aligned and PrefAlign is set to the
|
||||
|
@ -927,6 +927,14 @@ static bool SinkCast(CastInst *CI) {
|
||||
///
|
||||
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
|
||||
const DataLayout &DL) {
|
||||
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
|
||||
// than sinking only nop casts, but is helpful on some platforms.
|
||||
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
|
||||
if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
|
||||
ASC->getDestAddressSpace()))
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this is a noop copy,
|
||||
EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
|
||||
EVT DstVT = TLI.getValueType(DL, CI->getType());
|
||||
|
@ -0,0 +1,21 @@
|
||||
; RUN: opt -S -codegenprepare < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; CHECK-LABEL: @test
|
||||
define i64 @test(i1 %pred, i64* %ptr) {
|
||||
; CHECK: addrspacecast
|
||||
%ptr_as1 = addrspacecast i64* %ptr to i64 addrspace(1)*
|
||||
br i1 %pred, label %l1, label %l2
|
||||
l1:
|
||||
; CHECK-LABEL: l1:
|
||||
; CHECK-NOT: addrspacecast
|
||||
%v1 = load i64, i64* %ptr
|
||||
ret i64 %v1
|
||||
l2:
|
||||
; CHECK-LABEL: l2:
|
||||
; CHECK-NOT: addrspacecast
|
||||
%v2 = load i64, i64 addrspace(1)* %ptr_as1
|
||||
ret i64 %v2
|
||||
}
|
Loading…
Reference in New Issue
Block a user