mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
More efficient v2f64 shuffle using movlhps, movhlps, unpckhpd, and unpcklpd.
llvm-svn: 27040
This commit is contained in:
parent
184c4b937e
commit
3028b04057
@ -1398,8 +1398,19 @@ bool X86::isSHUFPMask(SDNode *N) {
|
|||||||
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||||
|
|
||||||
unsigned NumOperands = N->getNumOperands();
|
unsigned NumOperands = N->getNumOperands();
|
||||||
if (NumOperands != 2 && NumOperands != 4)
|
if (NumOperands == 2) {
|
||||||
return false;
|
// The only case that ought be handled by SHUFPD is
|
||||||
|
// Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 }
|
||||||
|
// Expect bit 0 == 1, bit1 == 2
|
||||||
|
SDOperand Bit0 = N->getOperand(0);
|
||||||
|
SDOperand Bit1 = N->getOperand(1);
|
||||||
|
assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
return (cast<ConstantSDNode>(Bit0)->getValue() == 1 &&
|
||||||
|
cast<ConstantSDNode>(Bit1)->getValue() == 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NumOperands != 4) return false;
|
||||||
|
|
||||||
// Each half must refer to only one of the vector.
|
// Each half must refer to only one of the vector.
|
||||||
SDOperand Elt = N->getOperand(0);
|
SDOperand Elt = N->getOperand(0);
|
||||||
@ -1424,6 +1435,58 @@ bool X86::isSHUFPMask(SDNode *N) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isMOVLHPSorUNPCKLPDMask - Return true if the specified VECTOR_SHUFFLE
|
||||||
|
/// operand specifies a shuffle of elements that is suitable for input to
|
||||||
|
/// MOVLHPS or UNPCKLPD.
|
||||||
|
bool X86::isMOVLHPSorUNPCKLPDMask(SDNode *N) {
|
||||||
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||||
|
|
||||||
|
if (N->getNumOperands() != 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Expect bit 0 == 0, bit1 == 2
|
||||||
|
SDOperand Bit0 = N->getOperand(0);
|
||||||
|
SDOperand Bit1 = N->getOperand(1);
|
||||||
|
assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
return (cast<ConstantSDNode>(Bit0)->getValue() == 0 &&
|
||||||
|
cast<ConstantSDNode>(Bit1)->getValue() == 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
|
||||||
|
bool X86::isMOVHLPSMask(SDNode *N) {
|
||||||
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||||
|
|
||||||
|
if (N->getNumOperands() != 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Expect bit 0 == 0, bit1 == 3
|
||||||
|
SDOperand Bit0 = N->getOperand(0);
|
||||||
|
SDOperand Bit1 = N->getOperand(1);
|
||||||
|
assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
return (cast<ConstantSDNode>(Bit0)->getValue() == 0 &&
|
||||||
|
cast<ConstantSDNode>(Bit1)->getValue() == 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// isUNPCKHPDMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to UNPCKHPD.
|
||||||
|
bool X86::isUNPCKHPDMask(SDNode *N) {
|
||||||
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||||
|
|
||||||
|
if (N->getNumOperands() != 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Expect bit 0 == 1, bit1 == 3
|
||||||
|
SDOperand Bit0 = N->getOperand(0);
|
||||||
|
SDOperand Bit1 = N->getOperand(1);
|
||||||
|
assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) &&
|
||||||
|
"Invalid VECTOR_SHUFFLE mask!");
|
||||||
|
return (cast<ConstantSDNode>(Bit0)->getValue() == 1 &&
|
||||||
|
cast<ConstantSDNode>(Bit1)->getValue() == 3);
|
||||||
|
}
|
||||||
|
|
||||||
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
|
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
|
||||||
/// a splat of a single element.
|
/// a splat of a single element.
|
||||||
bool X86::isSplatMask(SDNode *N) {
|
bool X86::isSplatMask(SDNode *N) {
|
||||||
@ -2244,6 +2307,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
SDOperand V2 = Op.getOperand(1);
|
SDOperand V2 = Op.getOperand(1);
|
||||||
SDOperand PermMask = Op.getOperand(2);
|
SDOperand PermMask = Op.getOperand(2);
|
||||||
MVT::ValueType VT = Op.getValueType();
|
MVT::ValueType VT = Op.getValueType();
|
||||||
|
unsigned NumElems = PermMask.getNumOperands();
|
||||||
|
|
||||||
// Handle splat cases.
|
// Handle splat cases.
|
||||||
if (X86::isSplatMask(PermMask.Val)) {
|
if (X86::isSplatMask(PermMask.Val)) {
|
||||||
@ -2265,8 +2329,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
|
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
|
||||||
DAG.getNode(ISD::UNDEF, V1.getValueType()),
|
DAG.getNode(ISD::UNDEF, V1.getValueType()),
|
||||||
PermMask);
|
PermMask);
|
||||||
} else if (X86::isSHUFPMask(PermMask.Val)) {
|
} else if (NumElems == 2 || X86::isSHUFPMask(PermMask.Val)) {
|
||||||
unsigned NumElems = PermMask.getNumOperands();
|
|
||||||
SDOperand Elt = PermMask.getOperand(0);
|
SDOperand Elt = PermMask.getOperand(0);
|
||||||
if (cast<ConstantSDNode>(Elt)->getValue() >= NumElems) {
|
if (cast<ConstantSDNode>(Elt)->getValue() >= NumElems) {
|
||||||
// Swap the operands and change mask.
|
// Swap the operands and change mask.
|
||||||
@ -2406,7 +2469,8 @@ bool
|
|||||||
X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
|
X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
|
||||||
// Only do shuffles on 128-bit vector types for now.
|
// Only do shuffles on 128-bit vector types for now.
|
||||||
if (MVT::getSizeInBits(VT) == 64) return false;
|
if (MVT::getSizeInBits(VT) == 64) return false;
|
||||||
return (X86::isSplatMask(Mask.Val) ||
|
return (Mask.Val->getNumOperands() == 2 ||
|
||||||
|
X86::isSplatMask(Mask.Val) ||
|
||||||
X86::isPSHUFDMask(Mask.Val) ||
|
X86::isPSHUFDMask(Mask.Val) ||
|
||||||
X86::isSHUFPMask(Mask.Val));
|
X86::isSHUFPMask(Mask.Val));
|
||||||
}
|
}
|
||||||
|
@ -187,6 +187,19 @@ namespace llvm {
|
|||||||
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
|
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
|
||||||
bool isSHUFPMask(SDNode *N);
|
bool isSHUFPMask(SDNode *N);
|
||||||
|
|
||||||
|
/// isMOVLHPSorUNPCKLPDMask - Return true if the specified VECTOR_SHUFFLE
|
||||||
|
/// operand specifies a shuffle of elements that is suitable for input to
|
||||||
|
/// MOVLHPS or UNPCKLPD.
|
||||||
|
bool isMOVLHPSorUNPCKLPDMask(SDNode *N);
|
||||||
|
|
||||||
|
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
|
||||||
|
bool isMOVHLPSMask(SDNode *N);
|
||||||
|
|
||||||
|
/// isUNPCKHPDMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to UNPCKHPD.
|
||||||
|
bool isUNPCKHPDMask(SDNode *N);
|
||||||
|
|
||||||
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
|
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
/// specifies a splat of a single element.
|
/// specifies a splat of a single element.
|
||||||
bool isSplatMask(SDNode *N);
|
bool isSplatMask(SDNode *N);
|
||||||
|
@ -59,6 +59,18 @@ def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{
|
|||||||
return X86::isSplatMask(N);
|
return X86::isSplatMask(N);
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def MOVLHPSorUNPCKLPD_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
|
return X86::isMOVLHPSorUNPCKLPDMask(N);
|
||||||
|
}], SHUFFLE_get_shuf_imm>;
|
||||||
|
|
||||||
|
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
|
return X86::isMOVHLPSMask(N);
|
||||||
|
}], SHUFFLE_get_shuf_imm>;
|
||||||
|
|
||||||
|
def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
|
return X86::isUNPCKHPDMask(N);
|
||||||
|
}], SHUFFLE_get_shuf_imm>;
|
||||||
|
|
||||||
// Only use PSHUF if it is not a splat.
|
// Only use PSHUF if it is not a splat.
|
||||||
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
|
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
|
return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
|
||||||
@ -444,6 +456,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
|
|||||||
let isTwoAddress = 1 in {
|
let isTwoAddress = 1 in {
|
||||||
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"movlhps {$src2, $dst|$dst, $src2}", []>;
|
"movlhps {$src2, $dst|$dst, $src2}", []>;
|
||||||
|
|
||||||
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"movlhps {$src2, $dst|$dst, $src2}", []>;
|
"movlhps {$src2, $dst|$dst, $src2}", []>;
|
||||||
}
|
}
|
||||||
@ -569,11 +582,11 @@ def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
|||||||
(load addr:$src2))))]>;
|
(load addr:$src2))))]>;
|
||||||
def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"subpd {$src2, $dst|$dst, $src2}",
|
"subpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (fsub VR128:$src1, VR128:$src2))]>;
|
[(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
|
||||||
def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"subpd {$src2, $dst|$dst, $src2}",
|
"subpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (fsub VR128:$src1,
|
[(set VR128:$dst, (v2f64 (fsub VR128:$src1,
|
||||||
(load addr:$src2)))]>;
|
(load addr:$src2))))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||||
@ -728,7 +741,6 @@ def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
|
|||||||
def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
|
def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
|
||||||
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
|
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
|
||||||
}
|
|
||||||
|
|
||||||
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
|
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
@ -754,6 +766,7 @@ def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
|
|||||||
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
|
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"unpcklpd {$src2, $dst|$dst, $src2}", []>;
|
"unpcklpd {$src2, $dst|$dst, $src2}", []>;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE integer instructions
|
// SSE integer instructions
|
||||||
@ -869,6 +882,9 @@ def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
|
|||||||
def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
|
def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
|
|
||||||
|
// bit_convert
|
||||||
|
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
|
||||||
|
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
|
||||||
|
|
||||||
// Splat v4f32 / v4i32
|
// Splat v4f32 / v4i32
|
||||||
def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
|
def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm),
|
||||||
@ -892,6 +908,29 @@ def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
|
|||||||
(v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
|
(v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// bit_convert
|
// Shuffle v2f64 / v2i64
|
||||||
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
|
def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
||||||
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
|
MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
|
||||||
|
(v2f64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
|
||||||
|
def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
||||||
|
MOVHLPS_shuffle_mask:$sm),
|
||||||
|
(v2f64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
|
||||||
|
def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
||||||
|
UNPCKHPD_shuffle_mask:$sm),
|
||||||
|
(v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(vector_shuffle (v2f64 VR128:$src1), (loadv2f64 addr:$src2),
|
||||||
|
MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
|
||||||
|
(v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
|
def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
|
||||||
|
MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
|
||||||
|
(v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
|
||||||
|
def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
|
||||||
|
MOVHLPS_shuffle_mask:$sm),
|
||||||
|
(v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE1]>;
|
||||||
|
def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2),
|
||||||
|
UNPCKHPD_shuffle_mask:$sm),
|
||||||
|
(v2i64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
|
||||||
|
def : Pat<(vector_shuffle (v2i64 VR128:$src1), (loadv2i64 addr:$src2),
|
||||||
|
MOVLHPSorUNPCKLPD_shuffle_mask:$sm),
|
||||||
|
(v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
|
||||||
|
Loading…
Reference in New Issue
Block a user