mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[NVPTX] Calling conventions fix
Fix ABI handling for function returning bool -- use st.param.b32 to return the value and use ld.param.b32 in caller to load the return value. llvm-svn: 185177
This commit is contained in:
parent
6feb5e8392
commit
f17855a9dc
@ -1207,7 +1207,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
sz = 8;
|
||||
|
||||
SmallVector<EVT, 4> LoadRetVTs;
|
||||
if (sz < 16) {
|
||||
EVT TheLoadType = VTs[i];
|
||||
if (retTy->isIntegerTy() &&
|
||||
TD->getTypeAllocSizeInBits(retTy) < 32) {
|
||||
// This is for integer types only, and specifically not for
|
||||
// aggregates.
|
||||
LoadRetVTs.push_back(MVT::i32);
|
||||
TheLoadType = MVT::i32;
|
||||
} else if (sz < 16) {
|
||||
// If loading i1/i8 result, generate
|
||||
// load i8 (-> i16)
|
||||
// trunc i16 to i1/i8
|
||||
@ -1225,7 +1232,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
SDValue retval = DAG.getMemIntrinsicNode(
|
||||
NVPTXISD::LoadParam, dl,
|
||||
DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
|
||||
LoadRetOps.size(), VTs[i], MachinePointerInfo());
|
||||
LoadRetOps.size(), TheLoadType, MachinePointerInfo());
|
||||
Chain = retval.getValue(1);
|
||||
InFlag = retval.getValue(2);
|
||||
SDValue Ret0 = retval.getValue(0);
|
||||
@ -1798,7 +1805,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
SDLoc dl, SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const Function *F = MF.getFunction();
|
||||
const Type *RetTy = F->getReturnType();
|
||||
Type *RetTy = F->getReturnType();
|
||||
const DataLayout *TD = getDataLayout();
|
||||
|
||||
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
|
||||
@ -1806,14 +1813,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
if (!isABI)
|
||||
return Chain;
|
||||
|
||||
if (const VectorType *VTy = dyn_cast<const VectorType>(RetTy)) {
|
||||
if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
|
||||
// If we have a vector type, the OutVals array will be the scalarized
|
||||
// components and we have combine them into 1 or more vector stores.
|
||||
unsigned NumElts = VTy->getNumElements();
|
||||
assert(NumElts == Outs.size() && "Bad scalarization of return value");
|
||||
|
||||
// const_cast can be removed in later LLVM versions
|
||||
EVT EltVT = getValueType(const_cast<Type *>(RetTy)).getVectorElementType();
|
||||
EVT EltVT = getValueType(RetTy).getVectorElementType();
|
||||
bool NeedExtend = false;
|
||||
if (EltVT.getSizeInBits() < 16)
|
||||
NeedExtend = true;
|
||||
@ -1923,34 +1930,43 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
SmallVector<EVT, 16> ValVTs;
|
||||
// const_cast is necessary since we are still using an LLVM version from
|
||||
// before the type system re-write.
|
||||
ComputePTXValueVTs(*this, const_cast<Type *>(RetTy), ValVTs);
|
||||
ComputePTXValueVTs(*this, RetTy, ValVTs);
|
||||
assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
|
||||
|
||||
unsigned sizesofar = 0;
|
||||
unsigned SizeSoFar = 0;
|
||||
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
|
||||
SDValue theVal = OutVals[i];
|
||||
EVT theValType = theVal.getValueType();
|
||||
EVT TheValType = theVal.getValueType();
|
||||
unsigned numElems = 1;
|
||||
if (theValType.isVector())
|
||||
numElems = theValType.getVectorNumElements();
|
||||
if (TheValType.isVector())
|
||||
numElems = TheValType.getVectorNumElements();
|
||||
for (unsigned j = 0, je = numElems; j != je; ++j) {
|
||||
SDValue tmpval = theVal;
|
||||
if (theValType.isVector())
|
||||
tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
|
||||
theValType.getVectorElementType(), tmpval,
|
||||
SDValue TmpVal = theVal;
|
||||
if (TheValType.isVector())
|
||||
TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
|
||||
TheValType.getVectorElementType(), TmpVal,
|
||||
DAG.getIntPtrConstant(j));
|
||||
EVT theStoreType = tmpval.getValueType();
|
||||
if (theStoreType.getSizeInBits() < 8)
|
||||
tmpval = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, tmpval);
|
||||
SDValue Ops[] = { Chain, DAG.getConstant(sizesofar, MVT::i32), tmpval };
|
||||
EVT TheStoreType = ValVTs[i];
|
||||
if (RetTy->isIntegerTy() &&
|
||||
TD->getTypeAllocSizeInBits(RetTy) < 32) {
|
||||
// The following zero-extension is for integer types only, and
|
||||
// specifically not for aggregates.
|
||||
TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
|
||||
TheStoreType = MVT::i32;
|
||||
}
|
||||
else if (TmpVal.getValueType().getSizeInBits() < 16)
|
||||
TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
|
||||
|
||||
SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal };
|
||||
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
|
||||
DAG.getVTList(MVT::Other), &Ops[0], 3,
|
||||
ValVTs[i], MachinePointerInfo());
|
||||
if (theValType.isVector())
|
||||
sizesofar +=
|
||||
ValVTs[i].getVectorElementType().getStoreSizeInBits() / 8;
|
||||
DAG.getVTList(MVT::Other), &Ops[0],
|
||||
3, TheStoreType,
|
||||
MachinePointerInfo());
|
||||
if(TheValType.isVector())
|
||||
SizeSoFar +=
|
||||
TheStoreType.getVectorElementType().getStoreSizeInBits() / 8;
|
||||
else
|
||||
sizesofar += ValVTs[i].getStoreSizeInBits() / 8;
|
||||
SizeSoFar += TheStoreType.getStoreSizeInBits()/8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -195,7 +195,7 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) {
|
||||
|
||||
define i16 @icmp_eq_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp eq i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -204,7 +204,7 @@ define i16 @icmp_eq_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_ne_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ne i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -213,7 +213,7 @@ define i16 @icmp_ne_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ugt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -222,7 +222,7 @@ define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_uge_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp uge i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -231,7 +231,7 @@ define i16 @icmp_uge_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_ult_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ult i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -240,7 +240,7 @@ define i16 @icmp_ult_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_ule_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ule i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -249,7 +249,7 @@ define i16 @icmp_ule_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sgt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -258,7 +258,7 @@ define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_sge_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sge i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -267,7 +267,7 @@ define i16 @icmp_sge_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_slt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp slt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -276,7 +276,7 @@ define i16 @icmp_slt_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i16 @icmp_sle_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sle i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
@ -289,7 +289,7 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
|
||||
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp eq i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -299,7 +299,7 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ne i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -309,7 +309,7 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ugt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -319,7 +319,7 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_uge_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp uge i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -329,7 +329,7 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_ult_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ult i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -339,7 +339,7 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_ule_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ule i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -349,7 +349,7 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sgt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -359,7 +359,7 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_sge_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sge i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -369,7 +369,7 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_slt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp slt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
@ -379,7 +379,7 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) {
|
||||
define i8 @icmp_sle_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sle i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
|
@ -8,16 +8,16 @@
|
||||
; i16
|
||||
|
||||
define i16 @cvt_i16_i32(i32 %x) {
|
||||
; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
|
||||
; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
|
||||
; CHECK: ret
|
||||
%a = trunc i32 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @cvt_i16_i64(i64 %x) {
|
||||
; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
|
||||
; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
|
||||
; CHECK: ret
|
||||
%a = trunc i64 %x to i16
|
||||
ret i16 %a
|
||||
|
23
test/CodeGen/NVPTX/i8-param.ll
Normal file
23
test/CodeGen/NVPTX/i8-param.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
; CHECK: .visible .func (.param .b32 func_retval0) callee
|
||||
define i8 @callee(i8 %a) {
|
||||
; CHECK: ld.param.u8
|
||||
%ret = add i8 %a, 42
|
||||
; CHECK: st.param.b32
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
; CHECK: .visible .func caller
|
||||
define void @caller(i8* %a) {
|
||||
; CHECK: ld.u8
|
||||
%val = load i8* %a
|
||||
%ret = tail call i8 @callee(i8 %val)
|
||||
; CHECK: ld.param.b32
|
||||
store i8 %ret, i8* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -4,27 +4,27 @@
|
||||
|
||||
;; i8
|
||||
define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(1)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(3)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(5)* %ptr
|
||||
ret i8 %a
|
||||
@ -32,27 +32,27 @@ define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
|
||||
|
||||
;; i16
|
||||
define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(1)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(3)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(5)* %ptr
|
||||
ret i16 %a
|
||||
|
@ -4,9 +4,9 @@
|
||||
|
||||
;; i8
|
||||
define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
|
||||
; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(0)* %ptr
|
||||
ret i8 %a
|
||||
@ -14,9 +14,9 @@ define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
|
||||
|
||||
;; i16
|
||||
define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
|
||||
; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ld.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ld.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(0)* %ptr
|
||||
ret i16 %a
|
||||
|
Loading…
Reference in New Issue
Block a user