From e5807f6b4707c47c8aac088a0f2a18ced13eceb4 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sat, 25 Mar 2006 09:37:23 +0000 Subject: [PATCH] Build arbitrary vector with more than 2 distinct scalar elements with a series of unpack and interleave ops. llvm-svn: 27119 --- lib/Target/X86/X86ISelLowering.cpp | 29 ++++++++++-- lib/Target/X86/X86ISelLowering.h | 4 ++ lib/Target/X86/X86InstrSSE.td | 76 +++++++++++++++++++++++++++++- 3 files changed, 104 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 01951e636ae..823d0709d3c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2376,7 +2376,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { abort(); } case ISD::BUILD_VECTOR: { + std::set Values; SDOperand Elt0 = Op.getOperand(0); + Values.insert(Elt0); bool Elt0IsZero = (isa(Elt0) && cast(Elt0)->getValue() == 0) || (isa(Elt0) && @@ -2384,15 +2386,16 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { bool RestAreZero = true; unsigned NumElems = Op.getNumOperands(); for (unsigned i = 1; i < NumElems; ++i) { - SDOperand V = Op.getOperand(i); - if (ConstantFPSDNode *FPC = dyn_cast(V)) { + SDOperand Elt = Op.getOperand(i); + if (ConstantFPSDNode *FPC = dyn_cast(Elt)) { if (!FPC->isExactlyValue(+0.0)) RestAreZero = false; - } else if (ConstantSDNode *C = dyn_cast(V)) { + } else if (ConstantSDNode *C = dyn_cast(Elt)) { if (!C->isNullValue()) RestAreZero = false; } else RestAreZero = false; + Values.insert(Elt); } if (RestAreZero) { @@ -2402,6 +2405,25 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); } + if (Values.size() > 2) { + // Expand into a number of unpckl*. + // e.g. for v4f32 + // Step 1: unpcklps 0, 2 ==> X: + // : unpcklps 1, 3 ==> Y: + // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> + MVT::ValueType VT = Op.getValueType(); + std::vector V(NumElems); + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); + NumElems >>= 1; + while (NumElems != 0) { + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]); + NumElems >>= 1; + } + return V[0]; + } + return SDOperand(); } } @@ -2439,6 +2461,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::S2VEC: return "X86ISD::S2VEC"; case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; + case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1dc90e536e1..71d7751e48d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -153,6 +153,10 @@ namespace llvm { /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base /// does not have to match the operand type. ZEXT_S2VEC, + + /// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS, + /// X86::PUNPCKL*. + UNPCKL, }; // X86 specific condition code. These correspond to X86_*_COND in diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index bdd43fbc397..a1946aae278 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -28,6 +28,11 @@ def X86s2vec : SDNode<"X86ISD::S2VEC", def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC", SDTypeProfile<1, 1, []>, []>; +def SDTUnpckl : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def X86unpckl : SDNode<"X86ISD::UNPCKL", SDTUnpckl, + []>; + //===----------------------------------------------------------------------===// // SSE pattern fragments //===----------------------------------------------------------------------===// @@ -787,10 +792,14 @@ def UNPCKHPDrm : PDI<0x15, MRMSrcMem, "unpckhpd {$src2, $dst|$dst, $src2}", []>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "unpcklps {$src2, $dst|$dst, $src2}", []>; + "unpcklps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, + VR128:$src2)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "unpcklps {$src2, $dst|$dst, $src2}", []>; + "unpcklps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", []>; @@ -885,6 +894,69 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "psubd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (sub VR128:$src1, (load addr:$src2))))]>; + +// Unpack and interleave +def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpcklbw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, + VR128:$src2)))]>; +def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpcklbw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; +def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpcklwd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, + VR128:$src2)))]>; +def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpcklwd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; +def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckldq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, + VR128:$src2)))]>; +def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckldq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; +def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpcklqdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpcklqdq {$src2, $dst|$dst, $src2}", []>; + +def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhbw {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhbw {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhwd {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhwd {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhqdq {$src2, $dst|$dst, $src2}", []>; } //===----------------------------------------------------------------------===//