1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

More Neon clean-up: avoid the need for custom-lowering vld/st-lane intrinsics

by creating TargetConstants during instruction selection instead of during
legalization.

llvm-svn: 84042
This commit is contained in:
Bob Wilson 2009-10-13 22:29:24 +00:00
parent 959ec973ad
commit a98883deaa
2 changed files with 24 additions and 90 deletions

View File

@ -1415,6 +1415,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld2 type");
@ -1424,7 +1425,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2i32: Opc = ARM::VLD2d32; break;
case MVT::v1i64: Opc = ARM::VLD2d64; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 4);
}
@ -1437,7 +1437,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4f32: Opc = ARM::VLD2q32; RegVT = MVT::v2f32; break;
case MVT::v4i32: Opc = ARM::VLD2q32; RegVT = MVT::v2i32; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
std::vector<EVT> ResTys(4, RegVT);
ResTys.push_back(MVT::Other);
@ -1454,6 +1453,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld3 type");
@ -1463,7 +1463,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2i32: Opc = ARM::VLD3d32; break;
case MVT::v1i64: Opc = ARM::VLD3d64; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 4);
}
@ -1482,7 +1481,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4i32:
Opc = ARM::VLD3q32a; Opc2 = ARM::VLD3q32b; RegVT = MVT::v2i32; break;
}
SDValue Chain = N->getOperand(0);
// Enable writeback to the address register.
MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
@ -1512,6 +1510,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld4 type");
@ -1521,7 +1520,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2i32: Opc = ARM::VLD4d32; break;
case MVT::v1i64: Opc = ARM::VLD4d64; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
std::vector<EVT> ResTys(4, VT);
ResTys.push_back(MVT::Other);
@ -1542,7 +1540,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4i32:
Opc = ARM::VLD4q32a; Opc2 = ARM::VLD4q32b; RegVT = MVT::v2i32; break;
}
SDValue Chain = N->getOperand(0);
// Enable writeback to the address register.
MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
@ -1574,6 +1571,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(5))->getZExtValue();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld2lane type");
@ -1582,10 +1581,9 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VLD2LNd32; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), Chain };
getI32Imm(Lane), Chain };
return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 7);
}
// Quad registers are handled by extracting subregs, doing the load,
@ -1610,8 +1608,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
RegVT = MVT::v2i32;
break;
}
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(5))->getZExtValue();
unsigned NumElts = RegVT.getVectorNumElements();
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
@ -1641,6 +1637,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(6))->getZExtValue();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld3lane type");
@ -1649,10 +1647,9 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VLD3LNd32; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), N->getOperand(6), Chain };
N->getOperand(5), getI32Imm(Lane), Chain };
return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 8);
}
// Quad registers are handled by extracting subregs, doing the load,
@ -1677,8 +1674,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
RegVT = MVT::v2i32;
break;
}
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(6))->getZExtValue();
unsigned NumElts = RegVT.getVectorNumElements();
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
@ -1714,6 +1709,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(7))->getZExtValue();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld4lane type");
@ -1722,11 +1719,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VLD4LNd32; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), N->getOperand(6),
N->getOperand(7), Chain };
getI32Imm(Lane), Chain };
std::vector<EVT> ResTys(4, VT);
ResTys.push_back(MVT::Other);
return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 9);
@ -1753,8 +1749,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
RegVT = MVT::v2i32;
break;
}
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(7))->getZExtValue();
unsigned NumElts = RegVT.getVectorNumElements();
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
@ -1797,6 +1791,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
VT = N->getOperand(3).getValueType();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
@ -1807,7 +1802,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2i32: Opc = ARM::VST2d32; break;
case MVT::v1i64: Opc = ARM::VST2d64; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4), Chain };
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6);
@ -1821,7 +1815,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4f32: Opc = ARM::VST2q32; RegVT = MVT::v2f32; break;
case MVT::v4i32: Opc = ARM::VST2q32; RegVT = MVT::v2i32; break;
}
SDValue Chain = N->getOperand(0);
SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
N->getOperand(3));
SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
@ -1839,6 +1832,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
VT = N->getOperand(3).getValueType();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
@ -1849,7 +1843,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2i32: Opc = ARM::VST3d32; break;
case MVT::v1i64: Opc = ARM::VST3d64; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), Chain };
@ -1870,7 +1863,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4i32:
Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2i32; break;
}
SDValue Chain = N->getOperand(0);
// Enable writeback to the address register.
MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
@ -1904,6 +1896,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
VT = N->getOperand(3).getValueType();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
@ -1914,7 +1907,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2i32: Opc = ARM::VST4d32; break;
case MVT::v1i64: Opc = ARM::VST4d64; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), N->getOperand(6), Chain };
@ -1935,7 +1927,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v4i32:
Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2i32; break;
}
SDValue Chain = N->getOperand(0);
// Enable writeback to the address register.
MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
@ -1975,6 +1966,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(5))->getZExtValue();
VT = N->getOperand(3).getValueType();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
@ -1984,10 +1977,9 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VST2LNd32; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), Chain };
getI32Imm(Lane), Chain };
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7);
}
// Quad registers are handled by extracting subregs and then doing
@ -2012,8 +2004,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
RegVT = MVT::v2i32;
break;
}
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(5))->getZExtValue();
unsigned NumElts = RegVT.getVectorNumElements();
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
@ -2031,6 +2021,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(6))->getZExtValue();
VT = N->getOperand(3).getValueType();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
@ -2040,10 +2032,9 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VST3LNd32; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), N->getOperand(6), Chain };
N->getOperand(5), getI32Imm(Lane), Chain };
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
}
// Quad registers are handled by extracting subregs and then doing
@ -2068,8 +2059,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
RegVT = MVT::v2i32;
break;
}
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(6))->getZExtValue();
unsigned NumElts = RegVT.getVectorNumElements();
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
@ -2089,6 +2078,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(7))->getZExtValue();
VT = N->getOperand(3).getValueType();
if (VT.is64BitVector()) {
switch (VT.getSimpleVT().SimpleTy) {
@ -2098,11 +2089,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VST4LNd32; break;
}
SDValue Chain = N->getOperand(0);
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
N->getOperand(3), N->getOperand(4),
N->getOperand(5), N->getOperand(6),
N->getOperand(7), Chain };
getI32Imm(Lane), Chain };
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 9);
}
// Quad registers are handled by extracting subregs and then doing
@ -2127,8 +2117,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
RegVT = MVT::v2i32;
break;
}
SDValue Chain = N->getOperand(0);
unsigned Lane = cast<ConstantSDNode>(N->getOperand(7))->getZExtValue();
unsigned NumElts = RegVT.getVectorNumElements();
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;

View File

@ -392,8 +392,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@ -1369,56 +1367,6 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
static SDValue LowerNeonVLDLaneIntrinsic(SDValue Op, SelectionDAG &DAG,
unsigned NumVecs) {
// Change the lane number operand to be a TargetConstant; otherwise it
// will be legalized into a register.
SDNode *Node = Op.getNode();
ConstantSDNode *Lane = dyn_cast<ConstantSDNode>(Node->getOperand(NumVecs+3));
if (!Lane) {
assert(false && "vld lane number must be a constant");
return SDValue();
}
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[NumVecs+3] = DAG.getTargetConstant(Lane->getZExtValue(), MVT::i32);
return DAG.UpdateNodeOperands(Op, &Ops[0], Ops.size());
}
static SDValue LowerNeonVSTLaneIntrinsic(SDValue Op, SelectionDAG &DAG,
unsigned NumVecs) {
// Change the lane number operand to be a TargetConstant; otherwise it
// will be legalized into a register.
SDNode *Node = Op.getNode();
ConstantSDNode *Lane = dyn_cast<ConstantSDNode>(Node->getOperand(NumVecs+3));
if (!Lane) {
assert(false && "vst lane number must be a constant");
return SDValue();
}
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[NumVecs+3] = DAG.getTargetConstant(Lane->getZExtValue(), MVT::i32);
return DAG.UpdateNodeOperands(Op, &Ops[0], Ops.size());
}
SDValue
ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntNo) {
case Intrinsic::arm_neon_vld2lane:
return LowerNeonVLDLaneIntrinsic(Op, DAG, 2);
case Intrinsic::arm_neon_vld3lane:
return LowerNeonVLDLaneIntrinsic(Op, DAG, 3);
case Intrinsic::arm_neon_vld4lane:
return LowerNeonVLDLaneIntrinsic(Op, DAG, 4);
case Intrinsic::arm_neon_vst2lane:
return LowerNeonVSTLaneIntrinsic(Op, DAG, 2);
case Intrinsic::arm_neon_vst3lane:
return LowerNeonVSTLaneIntrinsic(Op, DAG, 3);
case Intrinsic::arm_neon_vst4lane:
return LowerNeonVSTLaneIntrinsic(Op, DAG, 4);
default: return SDValue(); // Don't custom lower most intrinsics.
}
}
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@ -2802,8 +2750,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::RETURNADDR: break;
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
case ISD::SHL: