mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
More refactoring. NEON vst lane intrinsics can share almost all the code for
vld lane intrinsics. llvm-svn: 84110
This commit is contained in:
parent
7623a1ce5c
commit
5dbe1c0143
@ -126,12 +126,12 @@ private:
|
|||||||
/// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
|
/// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
|
||||||
SDNode *SelectDYN_ALLOC(SDValue Op);
|
SDNode *SelectDYN_ALLOC(SDValue Op);
|
||||||
|
|
||||||
/// SelectVLDLane - Select NEON load structure to one lane. NumVecs should
|
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
|
||||||
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
|
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
|
||||||
/// loading D registers and even subregs and odd subregs of Q registers.
|
/// load/store of D registers and even subregs and odd subregs of Q registers.
|
||||||
SDNode *SelectVLDLane(SDValue Op, unsigned NumVecs,
|
SDNode *SelectVLDSTLane(SDValue Op, bool IsLoad, unsigned NumVecs,
|
||||||
unsigned *DOpcodes, unsigned *QOpcodes0,
|
unsigned *DOpcodes, unsigned *QOpcodes0,
|
||||||
unsigned *QOpcodes1);
|
unsigned *QOpcodes1);
|
||||||
|
|
||||||
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
|
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
|
||||||
SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc);
|
SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc);
|
||||||
@ -990,10 +990,11 @@ static EVT GetNEONSubregVT(EVT VT) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SDNode *ARMDAGToDAGISel::SelectVLDLane(SDValue Op, unsigned NumVecs,
|
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
|
||||||
unsigned *DOpcodes, unsigned *QOpcodes0,
|
unsigned NumVecs, unsigned *DOpcodes,
|
||||||
unsigned *QOpcodes1) {
|
unsigned *QOpcodes0,
|
||||||
assert(NumVecs >=2 && NumVecs <= 4 && "VLDLane NumVecs out-of-range");
|
unsigned *QOpcodes1) {
|
||||||
|
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
|
||||||
SDNode *N = Op.getNode();
|
SDNode *N = Op.getNode();
|
||||||
DebugLoc dl = N->getDebugLoc();
|
DebugLoc dl = N->getDebugLoc();
|
||||||
|
|
||||||
@ -1004,11 +1005,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDLane(SDValue Op, unsigned NumVecs,
|
|||||||
SDValue Chain = N->getOperand(0);
|
SDValue Chain = N->getOperand(0);
|
||||||
unsigned Lane =
|
unsigned Lane =
|
||||||
cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue();
|
cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue();
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
|
||||||
bool is64BitVector = VT.is64BitVector();
|
bool is64BitVector = VT.is64BitVector();
|
||||||
|
|
||||||
// Quad registers are handled by extracting subregs, doing the load,
|
// Quad registers are handled by load/store of subregs. Find the subreg info.
|
||||||
// and then inserting the results as subregs. Find the subreg info.
|
|
||||||
unsigned NumElts = 0;
|
unsigned NumElts = 0;
|
||||||
int SubregIdx = 0;
|
int SubregIdx = 0;
|
||||||
EVT RegVT = VT;
|
EVT RegVT = VT;
|
||||||
@ -1020,7 +1020,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDLane(SDValue Op, unsigned NumVecs,
|
|||||||
|
|
||||||
unsigned OpcodeIndex;
|
unsigned OpcodeIndex;
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
switch (VT.getSimpleVT().SimpleTy) {
|
||||||
default: llvm_unreachable("unhandled vld lane type");
|
default: llvm_unreachable("unhandled vld/vst lane type");
|
||||||
// Double-register operations:
|
// Double-register operations:
|
||||||
case MVT::v8i8: OpcodeIndex = 0; break;
|
case MVT::v8i8: OpcodeIndex = 0; break;
|
||||||
case MVT::v4i16: OpcodeIndex = 1; break;
|
case MVT::v4i16: OpcodeIndex = 1; break;
|
||||||
@ -1058,6 +1058,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDLane(SDValue Op, unsigned NumVecs,
|
|||||||
Ops.push_back(getI32Imm(Lane));
|
Ops.push_back(getI32Imm(Lane));
|
||||||
Ops.push_back(Chain);
|
Ops.push_back(Chain);
|
||||||
|
|
||||||
|
if (!IsLoad)
|
||||||
|
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
|
||||||
|
|
||||||
std::vector<EVT> ResTys(NumVecs, RegVT);
|
std::vector<EVT> ResTys(NumVecs, RegVT);
|
||||||
ResTys.push_back(MVT::Other);
|
ResTys.push_back(MVT::Other);
|
||||||
SDNode *VLdLn =
|
SDNode *VLdLn =
|
||||||
@ -1084,7 +1087,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op,
|
|||||||
unsigned Opc) {
|
unsigned Opc) {
|
||||||
if (!Subtarget->hasV6T2Ops())
|
if (!Subtarget->hasV6T2Ops())
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
unsigned Shl_imm = 0;
|
unsigned Shl_imm = 0;
|
||||||
if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){
|
if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){
|
||||||
assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
|
assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
|
||||||
@ -1681,21 +1684,21 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
|||||||
unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
|
unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
|
||||||
unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
|
unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
|
||||||
unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
|
unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
|
||||||
return SelectVLDLane(Op, 2, DOpcodes, QOpcodes0, QOpcodes1);
|
return SelectVLDSTLane(Op, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vld3lane: {
|
case Intrinsic::arm_neon_vld3lane: {
|
||||||
unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
|
unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
|
||||||
unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
|
unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
|
||||||
unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
|
unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
|
||||||
return SelectVLDLane(Op, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
return SelectVLDSTLane(Op, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vld4lane: {
|
case Intrinsic::arm_neon_vld4lane: {
|
||||||
unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
|
unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
|
||||||
unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
|
unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
|
||||||
unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
|
unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
|
||||||
return SelectVLDLane(Op, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
return SelectVLDSTLane(Op, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vst2: {
|
case Intrinsic::arm_neon_vst2: {
|
||||||
@ -1874,175 +1877,24 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vst2lane: {
|
case Intrinsic::arm_neon_vst2lane: {
|
||||||
SDValue MemAddr, MemUpdate, MemOpc;
|
unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
|
||||||
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
|
unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
|
||||||
return NULL;
|
unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
|
||||||
SDValue Chain = N->getOperand(0);
|
return SelectVLDSTLane(Op, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
unsigned Lane = cast<ConstantSDNode>(N->getOperand(5))->getZExtValue();
|
|
||||||
VT = N->getOperand(3).getValueType();
|
|
||||||
if (VT.is64BitVector()) {
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
|
||||||
default: llvm_unreachable("unhandled vst2lane type");
|
|
||||||
case MVT::v8i8: Opc = ARM::VST2LNd8; break;
|
|
||||||
case MVT::v4i16: Opc = ARM::VST2LNd16; break;
|
|
||||||
case MVT::v2f32:
|
|
||||||
case MVT::v2i32: Opc = ARM::VST2LNd32; break;
|
|
||||||
}
|
|
||||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
|
|
||||||
N->getOperand(3), N->getOperand(4),
|
|
||||||
getI32Imm(Lane), Chain };
|
|
||||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7);
|
|
||||||
}
|
|
||||||
// Quad registers are handled by extracting subregs and then doing
|
|
||||||
// the store.
|
|
||||||
EVT RegVT;
|
|
||||||
unsigned Opc2 = 0;
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
|
||||||
default: llvm_unreachable("unhandled vst2lane type");
|
|
||||||
case MVT::v8i16:
|
|
||||||
Opc = ARM::VST2LNq16a;
|
|
||||||
Opc2 = ARM::VST2LNq16b;
|
|
||||||
RegVT = MVT::v4i16;
|
|
||||||
break;
|
|
||||||
case MVT::v4f32:
|
|
||||||
Opc = ARM::VST2LNq32a;
|
|
||||||
Opc2 = ARM::VST2LNq32b;
|
|
||||||
RegVT = MVT::v2f32;
|
|
||||||
break;
|
|
||||||
case MVT::v4i32:
|
|
||||||
Opc = ARM::VST2LNq32a;
|
|
||||||
Opc2 = ARM::VST2LNq32b;
|
|
||||||
RegVT = MVT::v2i32;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
unsigned NumElts = RegVT.getVectorNumElements();
|
|
||||||
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
|
|
||||||
|
|
||||||
SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(3));
|
|
||||||
SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(4));
|
|
||||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1,
|
|
||||||
getI32Imm(Lane % NumElts), Chain };
|
|
||||||
return CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2,
|
|
||||||
dl, MVT::Other, Ops, 7);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vst3lane: {
|
case Intrinsic::arm_neon_vst3lane: {
|
||||||
SDValue MemAddr, MemUpdate, MemOpc;
|
unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
|
||||||
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
|
unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
|
||||||
return NULL;
|
unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
|
||||||
SDValue Chain = N->getOperand(0);
|
return SelectVLDSTLane(Op, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
unsigned Lane = cast<ConstantSDNode>(N->getOperand(6))->getZExtValue();
|
|
||||||
VT = N->getOperand(3).getValueType();
|
|
||||||
if (VT.is64BitVector()) {
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
|
||||||
default: llvm_unreachable("unhandled vst3lane type");
|
|
||||||
case MVT::v8i8: Opc = ARM::VST3LNd8; break;
|
|
||||||
case MVT::v4i16: Opc = ARM::VST3LNd16; break;
|
|
||||||
case MVT::v2f32:
|
|
||||||
case MVT::v2i32: Opc = ARM::VST3LNd32; break;
|
|
||||||
}
|
|
||||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
|
|
||||||
N->getOperand(3), N->getOperand(4),
|
|
||||||
N->getOperand(5), getI32Imm(Lane), Chain };
|
|
||||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
|
|
||||||
}
|
|
||||||
// Quad registers are handled by extracting subregs and then doing
|
|
||||||
// the store.
|
|
||||||
EVT RegVT;
|
|
||||||
unsigned Opc2 = 0;
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
|
||||||
default: llvm_unreachable("unhandled vst3lane type");
|
|
||||||
case MVT::v8i16:
|
|
||||||
Opc = ARM::VST3LNq16a;
|
|
||||||
Opc2 = ARM::VST3LNq16b;
|
|
||||||
RegVT = MVT::v4i16;
|
|
||||||
break;
|
|
||||||
case MVT::v4f32:
|
|
||||||
Opc = ARM::VST3LNq32a;
|
|
||||||
Opc2 = ARM::VST3LNq32b;
|
|
||||||
RegVT = MVT::v2f32;
|
|
||||||
break;
|
|
||||||
case MVT::v4i32:
|
|
||||||
Opc = ARM::VST3LNq32a;
|
|
||||||
Opc2 = ARM::VST3LNq32b;
|
|
||||||
RegVT = MVT::v2i32;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
unsigned NumElts = RegVT.getVectorNumElements();
|
|
||||||
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
|
|
||||||
|
|
||||||
SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(3));
|
|
||||||
SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(4));
|
|
||||||
SDValue D2 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(5));
|
|
||||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1, D2,
|
|
||||||
getI32Imm(Lane % NumElts), Chain };
|
|
||||||
return CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2,
|
|
||||||
dl, MVT::Other, Ops, 8);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vst4lane: {
|
case Intrinsic::arm_neon_vst4lane: {
|
||||||
SDValue MemAddr, MemUpdate, MemOpc;
|
unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
|
||||||
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
|
unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
|
||||||
return NULL;
|
unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
|
||||||
SDValue Chain = N->getOperand(0);
|
return SelectVLDSTLane(Op, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
unsigned Lane = cast<ConstantSDNode>(N->getOperand(7))->getZExtValue();
|
|
||||||
VT = N->getOperand(3).getValueType();
|
|
||||||
if (VT.is64BitVector()) {
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
|
||||||
default: llvm_unreachable("unhandled vst4lane type");
|
|
||||||
case MVT::v8i8: Opc = ARM::VST4LNd8; break;
|
|
||||||
case MVT::v4i16: Opc = ARM::VST4LNd16; break;
|
|
||||||
case MVT::v2f32:
|
|
||||||
case MVT::v2i32: Opc = ARM::VST4LNd32; break;
|
|
||||||
}
|
|
||||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
|
|
||||||
N->getOperand(3), N->getOperand(4),
|
|
||||||
N->getOperand(5), N->getOperand(6),
|
|
||||||
getI32Imm(Lane), Chain };
|
|
||||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 9);
|
|
||||||
}
|
|
||||||
// Quad registers are handled by extracting subregs and then doing
|
|
||||||
// the store.
|
|
||||||
EVT RegVT;
|
|
||||||
unsigned Opc2 = 0;
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
|
||||||
default: llvm_unreachable("unhandled vst4lane type");
|
|
||||||
case MVT::v8i16:
|
|
||||||
Opc = ARM::VST4LNq16a;
|
|
||||||
Opc2 = ARM::VST4LNq16b;
|
|
||||||
RegVT = MVT::v4i16;
|
|
||||||
break;
|
|
||||||
case MVT::v4f32:
|
|
||||||
Opc = ARM::VST4LNq32a;
|
|
||||||
Opc2 = ARM::VST4LNq32b;
|
|
||||||
RegVT = MVT::v2f32;
|
|
||||||
break;
|
|
||||||
case MVT::v4i32:
|
|
||||||
Opc = ARM::VST4LNq32a;
|
|
||||||
Opc2 = ARM::VST4LNq32b;
|
|
||||||
RegVT = MVT::v2i32;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
unsigned NumElts = RegVT.getVectorNumElements();
|
|
||||||
int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
|
|
||||||
|
|
||||||
SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(3));
|
|
||||||
SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(4));
|
|
||||||
SDValue D2 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(5));
|
|
||||||
SDValue D3 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
|
|
||||||
N->getOperand(6));
|
|
||||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1, D2, D3,
|
|
||||||
getI32Imm(Lane % NumElts), Chain };
|
|
||||||
return CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2,
|
|
||||||
dl, MVT::Other, Ops, 9);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user