1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

Support addrspacecast initializers with isNoopAddrSpaceCast

Moves isNoopAddrSpaceCast to the TargetMachine. It logically belongs
with the DataLayout.
This commit is contained in:
Matt Arsenault 2016-05-27 15:50:12 -07:00 committed by Matt Arsenault
parent d6cafdd330
commit 4eb4bb060f
30 changed files with 131 additions and 78 deletions

View File

@ -222,7 +222,7 @@ public:
}
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
return getTLI()->isNoopAddrSpaceCast(FromAS, ToAS);
return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
}
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

View File

@ -1759,17 +1759,10 @@ public:
return "";
}
/// Returns true if a cast between SrcAS and DestAS is a noop.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
return false;
}
/// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
/// are happy to sink it into basic blocks. A cast may be free, but not
/// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
return isNoopAddrSpaceCast(SrcAS, DestAS);
}
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const;
/// Return true if the pointer arguments to CI should be aligned by aligning
/// the object whose address is being passed. If so then MinSize is set to the

View File

@ -271,6 +271,11 @@ public:
return Options.BBSectionsFuncListBuf.get();
}
/// Returns true if a cast between SrcAS and DestAS is a noop.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
return false;
}
/// Get a \c TargetIRAnalysis appropriate for the target.
///
/// This is used to construct the new pass manager's target IR analysis pass,

View File

@ -2295,6 +2295,16 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
switch (CE->getOpcode()) {
case Instruction::AddrSpaceCast: {
const Constant *Op = CE->getOperand(0);
unsigned DstAS = CE->getType()->getPointerAddressSpace();
unsigned SrcAS = Op->getType()->getPointerAddressSpace();
if (TM.isNoopAddrSpaceCast(SrcAS, DstAS))
return lowerConstant(Op);
// Fallthrough to error.
LLVM_FALLTHROUGH;
}
default: {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a

View File

@ -4322,7 +4322,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned SrcAS
= AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
return matchAddr(AddrInst->getOperand(0), Depth);
return false;
}

View File

@ -6394,7 +6394,7 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
unsigned AS) {
// Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
// pointer operands can be losslessly bitcasted to pointers of address space 0
if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) {
report_fatal_error("cannot lower memory intrinsic in address space " +
Twine(AS));
}

View File

@ -3425,7 +3425,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
setValue(&I, N);

View File

@ -801,6 +801,11 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
}
}
bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
}
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the command-line option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())

View File

@ -462,12 +462,6 @@ public:
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
/// This method returns a target specific FastISel object, or null if the
/// target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,

View File

@ -57,6 +57,12 @@ public:
SMDiagnostic &Error,
SMRange &SourceRange) const override;
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
private:
bool isLittle;
};

View File

@ -281,8 +281,6 @@ enum TargetIndex {
};
}
} // End namespace llvm
/// OpenCL uses address spaces to differentiate between
/// various memory regions on the hardware. On the CPU
/// all of the address spaces point to the same memory,
@ -339,4 +337,17 @@ namespace AMDGPUAS {
};
}
namespace AMDGPU {
// FIXME: Missing constant_32bit
inline bool isFlatGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
}
}
} // End namespace llvm
#endif

View File

@ -1677,8 +1677,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
const AMDGPUTargetMachine &TM
= static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
if (TM.isNoopAddrSpaceCast(SrcAS, DestAS)) {
MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST));
return true;
}
@ -2251,8 +2250,7 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
Register CmpVal = MI.getOperand(2).getReg();
Register NewVal = MI.getOperand(3).getReg();
assert(SITargetLowering::isFlatGlobalAddrSpace(
MRI.getType(PtrReg).getAddressSpace()) &&
assert(AMDGPU::isFlatGlobalAddrSpace(MRI.getType(PtrReg).getAddressSpace()) &&
"this should not have been custom lowered");
LLT ValTy = MRI.getType(CmpVal);

View File

@ -3232,7 +3232,7 @@ AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
LLT PtrTy = MRI.getType(PtrReg);
unsigned Size = PtrTy.getSizeInBits();
if (Subtarget.useFlatForGlobal() ||
!SITargetLowering::isFlatGlobalAddrSpace(PtrTy.getAddressSpace()))
!AMDGPU::isFlatGlobalAddrSpace(PtrTy.getAddressSpace()))
return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
// If we're using MUBUF instructions for global memory, an SGPR base register
@ -3258,8 +3258,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
if (PtrBank == &AMDGPU::SGPRRegBank &&
SITargetLowering::isFlatGlobalAddrSpace(AS)) {
if (PtrBank == &AMDGPU::SGPRRegBank && AMDGPU::isFlatGlobalAddrSpace(AS)) {
if (isScalarLoadLegal(MI)) {
// We have a uniform instruction so we want to use an SMRD load
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

View File

@ -526,6 +526,12 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl(
return I.get();
}
bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
AMDGPU::isFlatGlobalAddrSpace(DestAS);
}
TargetTransformInfo
R600TargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(R600TTIImpl(this, F));

View File

@ -62,6 +62,8 @@ public:
AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
}
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
};
//===----------------------------------------------------------------------===//

View File

@ -934,7 +934,10 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Type *MaskTy = MaskOp->getType();
bool DoTruncate = false;
if (!getTLI()->isNoopAddrSpaceCast(OldAS, NewAS)) {
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTLI()->getTargetMachine());
if (!TM.isNoopAddrSpaceCast(OldAS, NewAS)) {
// All valid 64-bit to 32-bit casts work by chopping off the high
// bits. Any masking only clearing the low bits will also apply in the new
// address space.

View File

@ -1478,11 +1478,6 @@ EVT SITargetLowering::getOptimalMemOpType(
return MVT::Other;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
}
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
const Value *Ptr = MemNode->getMemOperand()->getValue();
@ -1497,7 +1492,9 @@ bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
return true;
return isNoopAddrSpaceCast(SrcAS, DestAS);
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
}
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
@ -2285,8 +2282,10 @@ SDValue SITargetLowering::LowerFormalArguments(
if (Arg.Flags.isByRef()) {
SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, Chain, Offset);
if (!isNoopAddrSpaceCast(AMDGPUAS::CONSTANT_ADDRESS,
Arg.Flags.getPointerAddrSpace())) {
const GCNTargetMachine &TM =
static_cast<const GCNTargetMachine &>(getTargetMachine());
if (!TM.isNoopAddrSpaceCast(AMDGPUAS::CONSTANT_ADDRESS,
Arg.Flags.getPointerAddrSpace())) {
Ptr = DAG.getAddrSpaceCast(DL, VT, Ptr, AMDGPUAS::CONSTANT_ADDRESS,
Arg.Flags.getPointerAddrSpace());
}
@ -8506,7 +8505,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
if (!isFlatGlobalAddrSpace(AS))
if (!AMDGPU::isFlatGlobalAddrSpace(AS))
return Op;
// Non-local address space requires custom lowering for atomic compare

View File

@ -275,15 +275,6 @@ public:
AS == AMDGPUAS::PRIVATE_ADDRESS;
}
// FIXME: Missing constant_32bit
static bool isFlatGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
}
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
TargetLoweringBase::LegalizeTypeAction

View File

@ -528,12 +528,6 @@ class VectorType;
const TargetRegisterClass *
getRegClassFor(MVT VT, bool isDivergent = false) const override;
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
unsigned &PrefAlign) const override;

View File

@ -72,6 +72,12 @@ public:
}
bool targetSchedulesPostRAScheduling() const override { return true; };
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
};
/// ARM/Thumb little endian target machine.

View File

@ -365,14 +365,6 @@ class TargetRegisterClass;
return ABI.IsN64() ? Mips::A1_64 : Mips::A1;
}
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Mips doesn't have any special address spaces so we just reserve
// the first 256 for software use (e.g. OpenCL) and treat casts
// between them as noops.
return SrcAS < 256 && DestAS < 256;
}
bool isJumpTableRelative() const override {
return getTargetMachine().isPositionIndependent();
}

View File

@ -63,6 +63,14 @@ public:
return TLOF.get();
}
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Mips doesn't have any special address spaces so we just reserve
// the first 256 for software use (e.g. OpenCL) and treat casts
// between them as noops.
return SrcAS < 256 && DestAS < 256;
}
bool isLittleEndian() const { return isLittle; }
const MipsABIInfo &getABI() const { return ABI; }
};

View File

@ -1022,11 +1022,6 @@ namespace llvm {
}
};
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;

View File

@ -58,6 +58,11 @@ public:
const Triple &TT = getTargetTriple();
return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
};
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
};
} // end namespace llvm

View File

@ -2537,17 +2537,6 @@ Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
return TargetLowering::getSafeStackPointerLocation(IRB);
}
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
const TargetMachine &TM = getTargetMachine();
if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS))
return false;
return SrcAS < 256 && DestAS < 256;
}
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//

View File

@ -1349,8 +1349,6 @@ namespace llvm {
Align Alignment,
SelectionDAG &DAG) const;
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
/// Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;

View File

@ -311,6 +311,14 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
if (getPointerSize(SrcAS) != getPointerSize(DestAS))
return false;
return SrcAS < 256 && DestAS < 256;
}
//===----------------------------------------------------------------------===//
// X86 TTI query.
//===----------------------------------------------------------------------===//

View File

@ -54,6 +54,8 @@ public:
}
bool isJIT() const { return IsJIT; }
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
};
} // end namespace llvm

View File

@ -0,0 +1,7 @@
; RUN: not --crash llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s
; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*)
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
@gv_flatptr_from_lds = unnamed_addr addrspace(2) global i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4

View File

@ -0,0 +1,27 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; CHECK: global.arr:
; CHECK: .zero 1024
; CHECK: .size global.arr, 1024
; CHECK: gv_flatptr_from_global:
; CHECK: .quad global.arr+32
; CHECK: .size gv_flatptr_from_global, 8
; CHECK: gv_global_ptr:
; CHECK: .quad global.arr+32
; CHECK: .size gv_global_ptr, 8
; CHECK: gv_flatptr_from_constant:
; CHECK: .quad constant.arr+32
; CHECK: .size gv_flatptr_from_constant, 8
@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
@constant.arr = external unnamed_addr addrspace(4) global [256 x i32], align 4
@gv_flatptr_from_global = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4
@gv_global_ptr = unnamed_addr addrspace(4) global i32 addrspace(1)* getelementptr ([256 x i32], [256 x i32] addrspace(1)* @global.arr, i64 0, i64 8), align 4
@gv_flatptr_from_constant = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(4)* @constant.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4