mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[NVPTX] Added a feature to use short pointers for const/local/shared AS.
Const/local/shared address spaces are all < 4GB and we can always use 32-bit pointers to access them. This has substantial performance impact on kernels that uses shared memory for intermediary results. The feature is disabled by default. Differential Revision: https://reviews.llvm.org/D46147 llvm-svn: 331941
This commit is contained in:
parent
7e1eb25449
commit
e3372a717a
@ -66,6 +66,10 @@ bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
|
||||
return TL->allowUnsafeFPMath(*MF);
|
||||
}
|
||||
|
||||
bool NVPTXDAGToDAGISel::useShortPointers() const {
|
||||
return TM.useShortPointers();
|
||||
}
|
||||
|
||||
/// Select - Select instructions not customized! Used for
|
||||
/// expanded, promoted and normal instructions.
|
||||
void NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
@ -732,7 +736,6 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
|
||||
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
|
||||
unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
|
||||
unsigned DstAddrSpace = CastN->getDestAddressSpace();
|
||||
|
||||
assert(SrcAddrSpace != DstAddrSpace &&
|
||||
"addrspacecast must be between different address spaces");
|
||||
|
||||
@ -745,13 +748,19 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
|
||||
Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
|
||||
break;
|
||||
case ADDRESS_SPACE_SHARED:
|
||||
Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
|
||||
Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
|
||||
: NVPTX::cvta_shared_yes_64)
|
||||
: NVPTX::cvta_shared_yes;
|
||||
break;
|
||||
case ADDRESS_SPACE_CONST:
|
||||
Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
|
||||
Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
|
||||
: NVPTX::cvta_const_yes_64)
|
||||
: NVPTX::cvta_const_yes;
|
||||
break;
|
||||
case ADDRESS_SPACE_LOCAL:
|
||||
Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
|
||||
Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
|
||||
: NVPTX::cvta_local_yes_64)
|
||||
: NVPTX::cvta_local_yes;
|
||||
break;
|
||||
}
|
||||
ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
|
||||
@ -769,16 +778,19 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
|
||||
: NVPTX::cvta_to_global_yes;
|
||||
break;
|
||||
case ADDRESS_SPACE_SHARED:
|
||||
Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
|
||||
Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
|
||||
: NVPTX::cvta_to_shared_yes_64)
|
||||
: NVPTX::cvta_to_shared_yes;
|
||||
break;
|
||||
case ADDRESS_SPACE_CONST:
|
||||
Opc =
|
||||
TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
|
||||
Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
|
||||
: NVPTX::cvta_to_const_yes_64)
|
||||
: NVPTX::cvta_to_const_yes;
|
||||
break;
|
||||
case ADDRESS_SPACE_LOCAL:
|
||||
Opc =
|
||||
TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
|
||||
Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
|
||||
: NVPTX::cvta_to_local_yes_64)
|
||||
: NVPTX::cvta_to_local_yes;
|
||||
break;
|
||||
case ADDRESS_SPACE_PARAM:
|
||||
Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
|
||||
@ -834,18 +846,20 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
|
||||
return false;
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int codeAddrSpace = getCodeAddrSpace(LD);
|
||||
|
||||
if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
|
||||
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
|
||||
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
|
||||
return tryLDGLDU(N);
|
||||
}
|
||||
|
||||
unsigned int PointerSize =
|
||||
CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool isVolatile = LD->isVolatile();
|
||||
if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
isVolatile = false;
|
||||
|
||||
// Type Setting: fromType + fromTypeWidth
|
||||
@ -892,27 +906,27 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
|
||||
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
|
||||
getI32Imm(fromTypeWidth, dl), Addr, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
|
||||
MVT::Other, Ops);
|
||||
} else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
|
||||
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
|
||||
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
|
||||
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
|
||||
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
|
||||
NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
|
||||
NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
|
||||
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
|
||||
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
|
||||
MVT::Other, Ops);
|
||||
} else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
|
||||
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
|
||||
if (TM.is64Bit())
|
||||
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
|
||||
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
|
||||
if (PointerSize == 64)
|
||||
Opcode = pickOpcodeForVT(
|
||||
TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
|
||||
NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
|
||||
@ -924,13 +938,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
|
||||
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
|
||||
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
|
||||
MVT::Other, Ops);
|
||||
} else {
|
||||
if (TM.is64Bit())
|
||||
if (PointerSize == 64)
|
||||
Opcode = pickOpcodeForVT(
|
||||
TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
|
||||
NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
|
||||
@ -943,7 +957,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
|
||||
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
|
||||
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
|
||||
getI32Imm(fromTypeWidth, dl), N1, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
|
||||
@ -977,11 +991,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
|
||||
|
||||
if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
|
||||
return tryLDGLDU(N);
|
||||
}
|
||||
|
||||
unsigned int PointerSize =
|
||||
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool IsVolatile = MemSD->isVolatile();
|
||||
@ -1064,8 +1080,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
|
||||
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
|
||||
getI32Imm(FromTypeWidth, DL), Addr, Chain };
|
||||
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
|
||||
} else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
|
||||
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
|
||||
} else if (PointerSize == 64
|
||||
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
|
||||
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
@ -1090,9 +1107,10 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
|
||||
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
|
||||
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
|
||||
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
|
||||
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
|
||||
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
|
||||
if (TM.is64Bit()) {
|
||||
} else if (PointerSize == 64
|
||||
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
|
||||
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
|
||||
if (PointerSize == 64) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
@ -1140,7 +1158,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
|
||||
|
||||
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
|
||||
} else {
|
||||
if (TM.is64Bit()) {
|
||||
if (PointerSize == 64) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
@ -1685,14 +1703,16 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
return false;
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int codeAddrSpace = getCodeAddrSpace(ST);
|
||||
unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
|
||||
unsigned int PointerSize =
|
||||
CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool isVolatile = ST->isVolatile();
|
||||
if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
isVolatile = false;
|
||||
|
||||
// Vector Setting
|
||||
@ -1735,12 +1755,12 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
|
||||
Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
} else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||
} else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
|
||||
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
|
||||
NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
|
||||
@ -1748,13 +1768,13 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
|
||||
Offset, Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
} else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
if (TM.is64Bit())
|
||||
} else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
if (PointerSize == 64)
|
||||
Opcode = pickOpcodeForVT(
|
||||
SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
|
||||
NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
|
||||
@ -1768,12 +1788,12 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
return false;
|
||||
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
|
||||
Offset, Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
} else {
|
||||
if (TM.is64Bit())
|
||||
if (PointerSize == 64)
|
||||
Opcode =
|
||||
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
|
||||
NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
|
||||
@ -1787,7 +1807,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
|
||||
Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
@ -1816,11 +1836,12 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
|
||||
|
||||
// Address Space Setting
|
||||
unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
|
||||
|
||||
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
|
||||
report_fatal_error("Cannot store to pointer that points to constant "
|
||||
"memory space");
|
||||
}
|
||||
unsigned int PointerSize =
|
||||
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
@ -1901,8 +1922,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
|
||||
break;
|
||||
}
|
||||
StOps.push_back(Addr);
|
||||
} else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||
} else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
@ -1923,9 +1944,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
|
||||
}
|
||||
StOps.push_back(Base);
|
||||
StOps.push_back(Offset);
|
||||
} else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
if (TM.is64Bit()) {
|
||||
} else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
if (PointerSize == 64) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
@ -1968,7 +1989,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
|
||||
StOps.push_back(Base);
|
||||
StOps.push_back(Offset);
|
||||
} else {
|
||||
if (TM.is64Bit()) {
|
||||
if (PointerSize == 64) {
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
|
@ -35,6 +35,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
|
||||
bool useF32FTZ() const;
|
||||
bool allowFMA() const;
|
||||
bool allowUnsafeFPMath() const;
|
||||
bool useShortPointers() const;
|
||||
|
||||
public:
|
||||
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
|
||||
|
@ -1233,9 +1233,9 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
|
||||
SDValue
|
||||
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
|
||||
const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
|
||||
Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
|
||||
return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
|
||||
}
|
||||
|
||||
|
@ -147,6 +147,7 @@ def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
|
||||
def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
|
||||
def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
|
||||
|
||||
def useShortPtr : Predicate<"useShortPointers()">;
|
||||
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1937,6 +1937,12 @@ multiclass NG_TO_G<string Str, Intrinsic Intrin> {
|
||||
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
|
||||
!strconcat("cvta.", Str, ".u64 \t$result, $src;"),
|
||||
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
|
||||
def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
|
||||
"{{ .reg .b64 %tmp;\n\t"
|
||||
#" cvt.u64.u32 \t%tmp, $src;\n\t"
|
||||
#" cvta." # Str # ".u64 \t$result, %tmp; }}",
|
||||
[(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
|
||||
Requires<[useShortPtr]>;
|
||||
}
|
||||
|
||||
multiclass G_TO_NG<string Str, Intrinsic Intrin> {
|
||||
@ -1946,6 +1952,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
|
||||
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
|
||||
!strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
|
||||
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
|
||||
def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
|
||||
"{{ .reg .b64 %tmp;\n\t"
|
||||
#" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
|
||||
#" cvt.u32.u64 \t$result, %tmp; }}",
|
||||
[(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
|
||||
Requires<[useShortPtr]>;
|
||||
}
|
||||
|
||||
defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
|
||||
|
@ -79,7 +79,6 @@ public:
|
||||
bool hasImageHandles() const;
|
||||
bool hasFP16Math() const { return SmVersion >= 53; }
|
||||
bool allowFP16Math() const;
|
||||
|
||||
unsigned int getSmVersion() const { return SmVersion; }
|
||||
std::string getTargetName() const { return TargetName; }
|
||||
|
||||
|
@ -52,6 +52,12 @@ static cl::opt<bool> DisableRequireStructuredCFG(
|
||||
"unexpected regressions happen."),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> UseShortPointersOpt(
|
||||
"nvptx-short-ptr",
|
||||
cl::desc(
|
||||
"Use 32-bit pointers for accessing const/local/shared address spaces."),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
namespace llvm {
|
||||
|
||||
void initializeNVVMIntrRangePass(PassRegistry&);
|
||||
@ -83,11 +89,13 @@ extern "C" void LLVMInitializeNVPTXTarget() {
|
||||
initializeNVPTXLowerAggrCopiesPass(PR);
|
||||
}
|
||||
|
||||
static std::string computeDataLayout(bool is64Bit) {
|
||||
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
|
||||
std::string Ret = "e";
|
||||
|
||||
if (!is64Bit)
|
||||
Ret += "-p:32:32";
|
||||
else if (UseShortPointers)
|
||||
Ret += "-p3:32:32-p4:32:32-p5:32:32";
|
||||
|
||||
Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
|
||||
|
||||
@ -108,9 +116,11 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
|
||||
CodeGenOpt::Level OL, bool is64bit)
|
||||
// The pic relocation model is used regardless of what the client has
|
||||
// specified, as it is the only relocation model currently supported.
|
||||
: LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
|
||||
Reloc::PIC_, getEffectiveCodeModel(CM), OL),
|
||||
is64bit(is64bit), TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
|
||||
: LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
|
||||
CPU, FS, Options, Reloc::PIC_,
|
||||
getEffectiveCodeModel(CM), OL),
|
||||
is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
|
||||
TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
|
||||
Subtarget(TT, CPU, FS, *this) {
|
||||
if (TT.getOS() == Triple::NVCL)
|
||||
drvInterface = NVPTX::NVCL;
|
||||
|
@ -26,6 +26,8 @@ namespace llvm {
|
||||
///
|
||||
class NVPTXTargetMachine : public LLVMTargetMachine {
|
||||
bool is64bit;
|
||||
// Use 32-bit pointers for accessing const/local/short AS.
|
||||
bool UseShortPointers;
|
||||
std::unique_ptr<TargetLoweringObjectFile> TLOF;
|
||||
NVPTX::DrvInterface drvInterface;
|
||||
NVPTXSubtarget Subtarget;
|
||||
@ -45,6 +47,7 @@ public:
|
||||
}
|
||||
const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
bool is64Bit() const { return is64bit; }
|
||||
bool useShortPointers() const { return UseShortPointers; }
|
||||
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
|
||||
ManagedStringPool *getManagedStrPool() const {
|
||||
return const_cast<ManagedStringPool *>(&ManagedStrPool);
|
||||
|
@ -1,97 +1,96 @@
|
||||
; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefix=PTX32
|
||||
; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefix=PTX64
|
||||
; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32,G32
|
||||
; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64,G64
|
||||
; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr| FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64,G64
|
||||
|
||||
; ALL-LABEL: conv1
|
||||
define i32 @conv1(i32 addrspace(1)* %ptr) {
|
||||
; PTX32: conv1
|
||||
; PTX32: cvta.global.u32
|
||||
; PTX32: ld.u32
|
||||
; PTX64: conv1
|
||||
; PTX64: cvta.global.u64
|
||||
; PTX64: ld.u32
|
||||
; G32: cvta.global.u32
|
||||
; ALL-NOT: cvt.u64.u32
|
||||
; G64: cvta.global.u64
|
||||
; ALL: ld.u32
|
||||
%genptr = addrspacecast i32 addrspace(1)* %ptr to i32*
|
||||
%val = load i32, i32* %genptr
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; ALL-LABEL: conv2
|
||||
define i32 @conv2(i32 addrspace(3)* %ptr) {
|
||||
; PTX32: conv2
|
||||
; PTX32: cvta.shared.u32
|
||||
; PTX32: ld.u32
|
||||
; PTX64: conv2
|
||||
; PTX64: cvta.shared.u64
|
||||
; PTX64: ld.u32
|
||||
; CLS32: cvta.shared.u32
|
||||
; PTRCONV: cvt.u64.u32
|
||||
; NOPTRCONV-NOT: cvt.u64.u32
|
||||
; CLS64: cvta.shared.u64
|
||||
; ALL: ld.u32
|
||||
%genptr = addrspacecast i32 addrspace(3)* %ptr to i32*
|
||||
%val = load i32, i32* %genptr
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; ALL-LABEL: conv3
|
||||
define i32 @conv3(i32 addrspace(4)* %ptr) {
|
||||
; PTX32: conv3
|
||||
; PTX32: cvta.const.u32
|
||||
; PTX32: ld.u32
|
||||
; PTX64: conv3
|
||||
; PTX64: cvta.const.u64
|
||||
; PTX64: ld.u32
|
||||
; CLS32: cvta.const.u32
|
||||
; PTRCONV: cvt.u64.u32
|
||||
; NOPTRCONV-NOT: cvt.u64.u32
|
||||
; CLS64: cvta.const.u64
|
||||
; ALL: ld.u32
|
||||
%genptr = addrspacecast i32 addrspace(4)* %ptr to i32*
|
||||
%val = load i32, i32* %genptr
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; ALL-LABEL: conv4
|
||||
define i32 @conv4(i32 addrspace(5)* %ptr) {
|
||||
; PTX32: conv4
|
||||
; PTX32: cvta.local.u32
|
||||
; PTX32: ld.u32
|
||||
; PTX64: conv4
|
||||
; PTX64: cvta.local.u64
|
||||
; PTX64: ld.u32
|
||||
; CLS32: cvta.local.u32
|
||||
; PTRCONV: cvt.u64.u32
|
||||
; NOPTRCONV-NOT: cvt.u64.u32
|
||||
; CLS64: cvta.local.u64
|
||||
; ALL: ld.u32
|
||||
%genptr = addrspacecast i32 addrspace(5)* %ptr to i32*
|
||||
%val = load i32, i32* %genptr
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; ALL-LABEL: conv5
|
||||
define i32 @conv5(i32* %ptr) {
|
||||
; PTX32: conv5
|
||||
; PTX32: cvta.to.global.u32
|
||||
; PTX32: ld.global.u32
|
||||
; PTX64: conv5
|
||||
; PTX64: cvta.to.global.u64
|
||||
; PTX64: ld.global.u32
|
||||
; CLS32: cvta.to.global.u32
|
||||
; ALL-NOT: cvt.u64.u32
|
||||
; CLS64: cvta.to.global.u64
|
||||
; ALL: ld.global.u32
|
||||
%specptr = addrspacecast i32* %ptr to i32 addrspace(1)*
|
||||
%val = load i32, i32 addrspace(1)* %specptr
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; ALL-LABEL: conv6
|
||||
define i32 @conv6(i32* %ptr) {
|
||||
; PTX32: conv6
|
||||
; PTX32: cvta.to.shared.u32
|
||||
; PTX32: ld.shared.u32
|
||||
; PTX64: conv6
|
||||
; PTX64: cvta.to.shared.u64
|
||||
; PTX64: ld.shared.u32
|
||||
; CLS32: cvta.to.shared.u32
|
||||
; CLS64: cvta.to.shared.u64
|
||||
; PTRCONV: cvt.u32.u64
|
||||
; NOPTRCONV-NOT: cvt.u32.u64
|
||||
; ALL: ld.shared.u32
|
||||
%specptr = addrspacecast i32* %ptr to i32 addrspace(3)*
|
||||
%val = load i32, i32 addrspace(3)* %specptr
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; ALL-LABEL: conv7
|
||||
define i32 @conv7(i32* %ptr) {
|
||||
; PTX32: conv7
|
||||
; PTX32: cvta.to.const.u32
|
||||
; PTX32: ld.const.u32
|
||||
; PTX64: conv7
|
||||
; PTX64: cvta.to.const.u64
|
||||
; PTX64: ld.const.u32
|
||||
; CLS32: cvta.to.const.u32
|
||||
; CLS64: cvta.to.const.u64
|
||||
; PTRCONV: cvt.u32.u64
|
||||
; NOPTRCONV-NOT: cvt.u32.u64
|
||||
; ALL: ld.const.u32
|
||||
%specptr = addrspacecast i32* %ptr to i32 addrspace(4)*
|
||||
%val = load i32, i32 addrspace(4)* %specptr
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
; ALL-LABEL: conv8
|
||||
define i32 @conv8(i32* %ptr) {
|
||||
; PTX32: conv8
|
||||
; PTX32: cvta.to.local.u32
|
||||
; PTX32: ld.local.u32
|
||||
; PTX64: conv8
|
||||
; PTX64: cvta.to.local.u64
|
||||
; PTX64: ld.local.u32
|
||||
; CLS32: cvta.to.local.u32
|
||||
; CLS64: cvta.to.local.u64
|
||||
; PTRCONV: cvt.u32.u64
|
||||
; NOPTRCONV-NOT: cvt.u32.u64
|
||||
; ALL: ld.local.u32
|
||||
%specptr = addrspacecast i32* %ptr to i32 addrspace(5)*
|
||||
%val = load i32, i32 addrspace(5)* %specptr
|
||||
ret i32 %val
|
||||
|
@ -1,171 +1,160 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32
|
||||
|
||||
|
||||
;; i8
|
||||
define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_global_i8
|
||||
; G32: ld.global.u8 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; G64: ld.global.u8 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i8, i8 addrspace(1)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_shared_i8
|
||||
; LS32: ld.shared.u8 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.shared.u8 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i8, i8 addrspace(3)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_local_i8
|
||||
; LS32: ld.local.u8 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.local.u8 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i8, i8 addrspace(5)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
;; i16
|
||||
define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_global_i16
|
||||
; G32: ld.global.u16 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; G64: ld.global.u16 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i16, i16 addrspace(1)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_shared_i16
|
||||
; LS32: ld.shared.u16 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.shared.u16 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i16, i16 addrspace(3)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_local_i16
|
||||
; LS32: ld.local.u16 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.local.u16 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i16, i16 addrspace(5)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
;; i32
|
||||
define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_global_i32
|
||||
; G32: ld.global.u32 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; G64: ld.global.u32 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i32, i32 addrspace(1)* %ptr
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; ALL-LABEL: ld_shared_i32
|
||||
; LS32: ld.shared.u32 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.shared.u32 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i32, i32 addrspace(3)* %ptr
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_local_i32
|
||||
; LS32: ld.local.u32 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.local.u32 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i32, i32 addrspace(5)* %ptr
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
;; i64
|
||||
define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_global_i64
|
||||
; G32: ld.global.u64 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; G64: ld.global.u64 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i64, i64 addrspace(1)* %ptr
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_shared_i64
|
||||
; LS32: ld.shared.u64 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.shared.u64 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i64, i64 addrspace(3)* %ptr
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_local_i64
|
||||
; LS32: ld.local.u64 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.local.u64 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load i64, i64 addrspace(5)* %ptr
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
;; f32
|
||||
define float @ld_global_f32(float addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_global_f32
|
||||
; G32: ld.global.f32 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; G64: ld.global.f32 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load float, float addrspace(1)* %ptr
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @ld_shared_f32(float addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_shared_f32
|
||||
; LS32: ld.shared.f32 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.shared.f32 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load float, float addrspace(3)* %ptr
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @ld_local_f32(float addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_local_f32
|
||||
; LS32: ld.local.f32 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.local.f32 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load float, float addrspace(5)* %ptr
|
||||
ret float %a
|
||||
}
|
||||
|
||||
;; f64
|
||||
define double @ld_global_f64(double addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_global_f64
|
||||
; G32: ld.global.f64 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; G64: ld.global.f64 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load double, double addrspace(1)* %ptr
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @ld_shared_f64(double addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_shared_f64
|
||||
; LS32: ld.shared.f64 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.shared.f64 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load double, double addrspace(3)* %ptr
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @ld_local_f64(double addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
; ALL-LABEL: ld_local_f64
|
||||
; LS32: ld.local.f64 %{{.*}}, [%r{{[0-9]+}}]
|
||||
; LS64: ld.local.f64 %{{.*}}, [%rd{{[0-9]+}}]
|
||||
; ALL: ret
|
||||
%a = load double, double addrspace(5)* %ptr
|
||||
ret double %a
|
||||
}
|
||||
|
@ -1,177 +1,160 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32
|
||||
|
||||
|
||||
;; i8
|
||||
|
||||
; ALL-LABEL: st_global_i8
|
||||
define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
|
||||
; PTX32: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; G32: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; G64: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i8 %a, i8 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_shared_i8
|
||||
define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
|
||||
; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.shared.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; LS64: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i8 %a, i8 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_local_i8
|
||||
define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
|
||||
; PTX32: st.local.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.local.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; LS64: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i8 %a, i8 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i16
|
||||
|
||||
; ALL-LABEL: st_global_i16
|
||||
define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) {
|
||||
; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; G32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; G64: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i16 %a, i16 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_shared_i16
|
||||
define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) {
|
||||
; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; LS64: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i16 %a, i16 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_local_i16
|
||||
define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) {
|
||||
; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; LS64: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i16 %a, i16 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i32
|
||||
|
||||
; ALL-LABEL: st_global_i32
|
||||
define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) {
|
||||
; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; G32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; G64: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i32 %a, i32 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_shared_i32
|
||||
define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) {
|
||||
; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; LS32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; LS64: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i32 %a, i32 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_local_i32
|
||||
define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) {
|
||||
; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; LS64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i32 %a, i32 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i64
|
||||
|
||||
; ALL-LABEL: st_global_i64
|
||||
define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) {
|
||||
; PTX32: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; G32: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; G64: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i64 %a, i64 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_shared_i64
|
||||
define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) {
|
||||
; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.shared.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; LS64: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i64 %a, i64 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_local_i64
|
||||
define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) {
|
||||
; PTX32: st.local.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.local.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; LS64: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store i64 %a, i64 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; f32
|
||||
|
||||
; ALL-LABEL: st_global_f32
|
||||
define void @st_global_f32(float addrspace(1)* %ptr, float %a) {
|
||||
; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; G32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; G64: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store float %a, float addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_shared_f32
|
||||
define void @st_shared_f32(float addrspace(3)* %ptr, float %a) {
|
||||
; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; LS64: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store float %a, float addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_local_f32
|
||||
define void @st_local_f32(float addrspace(5)* %ptr, float %a) {
|
||||
; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; LS64: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store float %a, float addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; f64
|
||||
|
||||
; ALL-LABEL: st_global_f64
|
||||
define void @st_global_f64(double addrspace(1)* %ptr, double %a) {
|
||||
; PTX32: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; G32: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; G64: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store double %a, double addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_shared_f64
|
||||
define void @st_shared_f64(double addrspace(3)* %ptr, double %a) {
|
||||
; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.shared.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; LS64: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store double %a, double addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: st_local_f64
|
||||
define void @st_local_f64(double addrspace(5)* %ptr, double %a) {
|
||||
; PTX32: st.local.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
; LS32: st.local.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; LS64: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
||||
; ALL: ret
|
||||
store double %a, double addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user