mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
Build arbitrary vector with more than 2 distinct scalar elements with a
series of unpack and interleave ops. llvm-svn: 27119
This commit is contained in:
parent
d2823658b4
commit
e5807f6b47
@ -2376,7 +2376,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
||||
abort();
|
||||
}
|
||||
case ISD::BUILD_VECTOR: {
|
||||
std::set<SDOperand> Values;
|
||||
SDOperand Elt0 = Op.getOperand(0);
|
||||
Values.insert(Elt0);
|
||||
bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) &&
|
||||
cast<ConstantSDNode>(Elt0)->getValue() == 0) ||
|
||||
(isa<ConstantFPSDNode>(Elt0) &&
|
||||
@ -2384,15 +2386,16 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
||||
bool RestAreZero = true;
|
||||
unsigned NumElems = Op.getNumOperands();
|
||||
for (unsigned i = 1; i < NumElems; ++i) {
|
||||
SDOperand V = Op.getOperand(i);
|
||||
if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(V)) {
|
||||
SDOperand Elt = Op.getOperand(i);
|
||||
if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) {
|
||||
if (!FPC->isExactlyValue(+0.0))
|
||||
RestAreZero = false;
|
||||
} else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V)) {
|
||||
} else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
|
||||
if (!C->isNullValue())
|
||||
RestAreZero = false;
|
||||
} else
|
||||
RestAreZero = false;
|
||||
Values.insert(Elt);
|
||||
}
|
||||
|
||||
if (RestAreZero) {
|
||||
@ -2402,6 +2405,25 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0);
|
||||
}
|
||||
|
||||
if (Values.size() > 2) {
|
||||
// Expand into a number of unpckl*.
|
||||
// e.g. for v4f32
|
||||
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
|
||||
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
|
||||
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
std::vector<SDOperand> V(NumElems);
|
||||
for (unsigned i = 0; i < NumElems; ++i)
|
||||
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
|
||||
NumElems >>= 1;
|
||||
while (NumElems != 0) {
|
||||
for (unsigned i = 0; i < NumElems; ++i)
|
||||
V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]);
|
||||
NumElems >>= 1;
|
||||
}
|
||||
return V[0];
|
||||
}
|
||||
|
||||
return SDOperand();
|
||||
}
|
||||
}
|
||||
@ -2439,6 +2461,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::Wrapper: return "X86ISD::Wrapper";
|
||||
case X86ISD::S2VEC: return "X86ISD::S2VEC";
|
||||
case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC";
|
||||
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -153,6 +153,10 @@ namespace llvm {
|
||||
/// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base
|
||||
/// does not have to match the operand type.
|
||||
ZEXT_S2VEC,
|
||||
|
||||
/// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS,
|
||||
/// X86::PUNPCKL*.
|
||||
UNPCKL,
|
||||
};
|
||||
|
||||
// X86 specific condition code. These correspond to X86_*_COND in
|
||||
|
@ -28,6 +28,11 @@ def X86s2vec : SDNode<"X86ISD::S2VEC",
|
||||
def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
|
||||
SDTypeProfile<1, 1, []>, []>;
|
||||
|
||||
def SDTUnpckl : SDTypeProfile<1, 2,
|
||||
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
|
||||
def X86unpckl : SDNode<"X86ISD::UNPCKL", SDTUnpckl,
|
||||
[]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -787,10 +792,14 @@ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
|
||||
"unpckhpd {$src2, $dst|$dst, $src2}", []>;
|
||||
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"unpcklps {$src2, $dst|$dst, $src2}", []>;
|
||||
"unpcklps {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
|
||||
VR128:$src2)))]>;
|
||||
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
"unpcklps {$src2, $dst|$dst, $src2}", []>;
|
||||
"unpcklps {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
|
||||
(load addr:$src2))))]>;
|
||||
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"unpcklpd {$src2, $dst|$dst, $src2}", []>;
|
||||
@ -885,6 +894,69 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
"psubd {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v4i32 (sub VR128:$src1,
|
||||
(load addr:$src2))))]>;
|
||||
|
||||
// Unpack and interleave
|
||||
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpcklbw {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
|
||||
VR128:$src2)))]>;
|
||||
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpcklbw {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
|
||||
(load addr:$src2))))]>;
|
||||
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpcklwd {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
|
||||
VR128:$src2)))]>;
|
||||
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpcklwd {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
|
||||
(load addr:$src2))))]>;
|
||||
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpckldq {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
|
||||
VR128:$src2)))]>;
|
||||
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpckldq {$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
|
||||
(load addr:$src2))))]>;
|
||||
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpcklqdq {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpcklqdq {$src2, $dst|$dst, $src2}", []>;
|
||||
|
||||
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpckhbw {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpckhbw {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpckhwd {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpckhwd {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpckhdq {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpckhdq {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
|
||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
"punpckhdq {$src2, $dst|$dst, $src2}", []>;
|
||||
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
|
||||
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
"punpckhqdq {$src2, $dst|$dst, $src2}", []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
Loading…
Reference in New Issue
Block a user