From 3028b040571fe483d61b5da0fa39c3b9bcd2fa5a Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 24 Mar 2006 02:58:06 +0000 Subject: [PATCH] More efficient v2f64 shuffle using movlhps, movhlps, unpckhpd, and unpcklpd. llvm-svn: 27040 --- lib/Target/X86/X86ISelLowering.cpp | 74 ++++++++++++++++++++++++++++-- lib/Target/X86/X86ISelLowering.h | 13 ++++++ lib/Target/X86/X86InstrSSE.td | 53 ++++++++++++++++++--- 3 files changed, 128 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cd183131f17..4e209e4554e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1398,8 +1398,19 @@ bool X86::isSHUFPMask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); unsigned NumOperands = N->getNumOperands(); - if (NumOperands != 2 && NumOperands != 4) - return false; + if (NumOperands == 2) { + // The only case that ought be handled by SHUFPD is + // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } + // Expect bit 0 == 1, bit1 == 2 + SDOperand Bit0 = N->getOperand(0); + SDOperand Bit1 = N->getOperand(1); + assert(isa(Bit0) && isa(Bit1) && + "Invalid VECTOR_SHUFFLE mask!"); + return (cast(Bit0)->getValue() == 1 && + cast(Bit1)->getValue() == 2); + } + + if (NumOperands != 4) return false; // Each half must refer to only one of the vector. SDOperand Elt = N->getOperand(0); @@ -1424,6 +1435,58 @@ bool X86::isSHUFPMask(SDNode *N) { return true; } +/// isMOVLHPSorUNPCKLPDMask - Return true if the specified VECTOR_SHUFFLE +/// operand specifies a shuffle of elements that is suitable for input to +/// MOVLHPS or UNPCKLPD. +bool X86::isMOVLHPSorUNPCKLPDMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 2) + return false; + + // Expect bit 0 == 0, bit1 == 2 + SDOperand Bit0 = N->getOperand(0); + SDOperand Bit1 = N->getOperand(1); + assert(isa(Bit0) && isa(Bit1) && + "Invalid VECTOR_SHUFFLE mask!"); + return (cast(Bit0)->getValue() == 0 && + cast(Bit1)->getValue() == 2); +} + +/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVHLPS. +bool X86::isMOVHLPSMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 2) + return false; + + // Expect bit 0 == 0, bit1 == 3 + SDOperand Bit0 = N->getOperand(0); + SDOperand Bit1 = N->getOperand(1); + assert(isa(Bit0) && isa(Bit1) && + "Invalid VECTOR_SHUFFLE mask!"); + return (cast(Bit0)->getValue() == 0 && + cast(Bit1)->getValue() == 3); +} + +/// isUNPCKHPDMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to UNPCKHPD. +bool X86::isUNPCKHPDMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 2) + return false; + + // Expect bit 0 == 1, bit1 == 3 + SDOperand Bit0 = N->getOperand(0); + SDOperand Bit1 = N->getOperand(1); + assert(isa(Bit0) && isa(Bit1) && + "Invalid VECTOR_SHUFFLE mask!"); + return (cast(Bit0)->getValue() == 1 && + cast(Bit1)->getValue() == 3); +} + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies /// a splat of a single element. bool X86::isSplatMask(SDNode *N) { @@ -2244,6 +2307,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { SDOperand V2 = Op.getOperand(1); SDOperand PermMask = Op.getOperand(2); MVT::ValueType VT = Op.getValueType(); + unsigned NumElems = PermMask.getNumOperands(); // Handle splat cases. if (X86::isSplatMask(PermMask.Val)) { @@ -2265,8 +2329,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); - } else if (X86::isSHUFPMask(PermMask.Val)) { - unsigned NumElems = PermMask.getNumOperands(); + } else if (NumElems == 2 || X86::isSHUFPMask(PermMask.Val)) { SDOperand Elt = PermMask.getOperand(0); if (cast(Elt)->getValue() >= NumElems) { // Swap the operands and change mask. @@ -2406,7 +2469,8 @@ bool X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { // Only do shuffles on 128-bit vector types for now. if (MVT::getSizeInBits(VT) == 64) return false; - return (X86::isSplatMask(Mask.Val) || + return (Mask.Val->getNumOperands() == 2 || + X86::isSplatMask(Mask.Val) || X86::isPSHUFDMask(Mask.Val) || X86::isSHUFPMask(Mask.Val)); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d60ad39d9b6..bc4a7461d1d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -187,6 +187,19 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to SHUFP*. bool isSHUFPMask(SDNode *N); + /// isMOVLHPSorUNPCKLPDMask - Return true if the specified VECTOR_SHUFFLE + /// operand specifies a shuffle of elements that is suitable for input to + /// MOVLHPS or UNPCKLPD. + bool isMOVLHPSorUNPCKLPDMask(SDNode *N); + + /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to MOVHLPS. + bool isMOVHLPSMask(SDNode *N); + + /// isUNPCKHPDMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to UNPCKHPD. + bool isUNPCKHPDMask(SDNode *N); + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element. bool isSplatMask(SDNode *N); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ed686313a0f..e2ec85df82f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -59,6 +59,18 @@ def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{ return X86::isSplatMask(N); }]>; +def MOVLHPSorUNPCKLPD_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVLHPSorUNPCKLPDMask(N); +}], SHUFFLE_get_shuf_imm>; + +def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHLPSMask(N); +}], SHUFFLE_get_shuf_imm>; + +def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKHPDMask(N); +}], SHUFFLE_get_shuf_imm>; + // Only use PSHUF if it is not a splat. def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ return !X86::isSplatMask(N) && X86::isPSHUFDMask(N); @@ -444,6 +456,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src), let isTwoAddress = 1 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "movlhps {$src2, $dst|$dst, $src2}", []>; + def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "movlhps {$src2, $dst|$dst, $src2}", []>; } @@ -569,11 +582,11 @@ def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), (load addr:$src2))))]>; def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "subpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (fsub VR128:$src1, VR128:$src2))]>; + [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>; def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "subpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (fsub VR128:$src1, - (load addr:$src2)))]>; + [(set VR128:$dst, (v2f64 (fsub VR128:$src1, + (load addr:$src2))))]>; } def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src), @@ -728,7 +741,6 @@ def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>; -} def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), @@ -754,6 +766,7 @@ def UNPCKLPDrr : PDI<0x14, MRMSrcReg, def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", []>; +} //===----------------------------------------------------------------------===// // SSE integer instructions @@ -869,6 +882,9 @@ def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, Requires<[HasSSE2]>; +// bit_convert +def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; // Splat v4f32 / v4i32 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm), @@ -892,6 +908,29 @@ def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm), (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>, Requires<[HasSSE2]>; -// bit_convert -def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; +// Shuffle v2f64 / v2i64 +def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2), + MOVLHPSorUNPCKLPD_shuffle_mask:$sm), + (v2f64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>; +def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2), + MOVHLPS_shuffle_mask:$sm), + (v2f64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>; +def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2), + UNPCKHPD_shuffle_mask:$sm), + (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v2f64 VR128:$src1), (loadv2f64 addr:$src2), + MOVLHPSorUNPCKLPD_shuffle_mask:$sm), + (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>; + +def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2), + MOVLHPSorUNPCKLPD_shuffle_mask:$sm), + (v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2), + MOVHLPS_shuffle_mask:$sm), + (v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2), + UNPCKHPD_shuffle_mask:$sm), + (v2i64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src1), (loadv2i64 addr:$src2), + MOVLHPSorUNPCKLPD_shuffle_mask:$sm), + (v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;