1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[NVPTX] Fix vector loads from parameters that span multiple loads, and fix some typos

llvm-svn: 185332
This commit is contained in:
Justin Holewinski 2013-07-01 12:59:01 +00:00
parent 77ba2f5ed9
commit 6284a5cea6
3 changed files with 22 additions and 156 deletions

View File

@ -1997,7 +1997,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
Ops.push_back(Flag);
SDNode *Ret =
CurDAG->getMachineNode(Opc, DL, Node->getVTList(), Ops);
CurDAG->getMachineNode(Opc, DL, VTs, Ops);
return Ret;
}
@ -2270,8 +2270,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
}
}
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDNode *Ret =
CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);

View File

@ -340,158 +340,6 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
}
/*
std::string NVPTXTargetLowering::getPrototype(
Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
std::stringstream O;
O << "prototype_" << uniqueCallSite << " : .callprototype ";
if (retTy->getTypeID() == Type::VoidTyID)
O << "()";
else {
O << "(";
if (isABI) {
if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
unsigned size = 0;
if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
} else {
assert(retTy->isFloatingPointTy() &&
"Floating point type expected here");
size = retTy->getPrimitiveSizeInBits();
}
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy))
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
else {
if ((retTy->getTypeID() == Type::StructTyID) ||
isa<VectorType>(retTy)) {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, retTy, vtparts);
unsigned totalsz = 0;
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
elems = vtparts[i].getVectorNumElements();
elemtype = vtparts[i].getVectorElementType();
}
for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
if (elemtype.isInteger() && (sz < 8))
sz = 8;
totalsz += sz / 8;
}
}
O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
} else {
assert(false && "Unknown return type");
}
}
} else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, retTy, vtparts);
unsigned idx = 0;
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
elems = vtparts[i].getVectorNumElements();
elemtype = vtparts[i].getVectorElementType();
}
for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
if (elemtype.isInteger() && (sz < 32))
sz = 32;
O << ".reg .b" << sz << " _";
if (j < je - 1)
O << ", ";
++idx;
}
if (i < e - 1)
O << ", ";
}
}
O << ") ";
}
O << "_ (";
bool first = true;
MVT thePointerTy = getPointerTy();
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
const Type *Ty = Args[i].Ty;
if (!first) {
O << ", ";
}
first = false;
if (Outs[i].Flags.isByVal() == false) {
unsigned sz = 0;
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
if (sz < 32)
sz = 32;
} else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
if (isABI)
O << ".param .b" << sz << " ";
else
O << ".reg .b" << sz << " ";
O << "_";
continue;
}
const PointerType *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
if (isABI) {
unsigned align = Outs[i].Flags.getByValAlign();
unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
continue;
} else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, ETy, vtparts);
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
elems = vtparts[i].getVectorNumElements();
elemtype = vtparts[i].getVectorElementType();
}
for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
if (elemtype.isInteger() && (sz < 32))
sz = 32;
O << ".reg .b" << sz << " ";
O << "_";
if (j < je - 1)
O << ", ";
}
if (i < e - 1)
O << ", ";
}
continue;
}
}
O << ");";
return O.str();
}*/
std::string
NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@ -584,7 +432,9 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
OIdx += len - 1;
continue;
}
assert(getValueType(Ty) == Outs[OIdx].VT &&
// i8 types in IR will be i16 types in SDAG
assert((getValueType(Ty) == Outs[OIdx].VT ||
(getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
"type mismatch between callee prototype and arguments");
// scalar type
unsigned sz = 0;
@ -854,6 +704,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(StoreVal);
}
Ops.push_back(InFlag);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0],
Ops.size(), MemVT,
@ -1733,8 +1585,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
InVals.push_back(Elt);
}
Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
InsIdx += VecSize;
}
InsIdx += VecSize;
}
if (NumElts > 0)

View File

@ -0,0 +1,13 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
define <16 x float> @foo(<16 x float> %a) {
; Make sure we index into vectors properly
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0];
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+16];
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+32];
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+48];
ret <16 x float> %a
}