mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
Handle a few more cases of folding load i64 into xmm and zero top bits.
Note, some of the code will be moved into target independent part of DAG combiner in a subsequent patch. llvm-svn: 50918
This commit is contained in:
parent
f87942325f
commit
3493e43afd
@ -975,7 +975,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
|
|||||||
|
|
||||||
// Also handle the case where we explicitly require zeros in the top
|
// Also handle the case where we explicitly require zeros in the top
|
||||||
// elements. This is a vector shuffle from the zero vector.
|
// elements. This is a vector shuffle from the zero vector.
|
||||||
if (N.getOpcode() == X86ISD::ZEXT_VMOVL && N.Val->hasOneUse() &&
|
if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.Val->hasOneUse() &&
|
||||||
// Check to see if the top elements are all zeros (or bitcast of zeros).
|
// Check to see if the top elements are all zeros (or bitcast of zeros).
|
||||||
N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||||
N.getOperand(0).Val->hasOneUse() &&
|
N.getOperand(0).Val->hasOneUse() &&
|
||||||
|
@ -715,6 +715,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
|||||||
|
|
||||||
// We have target-specific dag combine patterns for the following nodes:
|
// We have target-specific dag combine patterns for the following nodes:
|
||||||
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
|
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
|
||||||
|
setTargetDAGCombine(ISD::BUILD_VECTOR);
|
||||||
setTargetDAGCombine(ISD::SELECT);
|
setTargetDAGCombine(ISD::SELECT);
|
||||||
setTargetDAGCombine(ISD::STORE);
|
setTargetDAGCombine(ISD::STORE);
|
||||||
|
|
||||||
@ -3481,9 +3482,9 @@ SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
|
|||||||
&MaskVec[0], MaskVec.size()));
|
&MaskVec[0], MaskVec.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getZextVMoveL - Return a zero-extending vector move low node.
|
/// getVZextMovL - Return a zero-extending vector move low node.
|
||||||
///
|
///
|
||||||
static SDOperand getZextVMoveL(MVT::ValueType VT, MVT::ValueType OpVT,
|
static SDOperand getVZextMovL(MVT::ValueType VT, MVT::ValueType OpVT,
|
||||||
SDOperand SrcOp, SelectionDAG &DAG,
|
SDOperand SrcOp, SelectionDAG &DAG,
|
||||||
const X86Subtarget *Subtarget) {
|
const X86Subtarget *Subtarget) {
|
||||||
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
|
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
|
||||||
@ -3501,7 +3502,7 @@ static SDOperand getZextVMoveL(MVT::ValueType VT, MVT::ValueType OpVT,
|
|||||||
// PR2108
|
// PR2108
|
||||||
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
|
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
||||||
DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
|
DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
|
||||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
|
DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
|
||||||
SrcOp.getOperand(0).getOperand(0))));
|
SrcOp.getOperand(0).getOperand(0))));
|
||||||
}
|
}
|
||||||
@ -3509,7 +3510,7 @@ static SDOperand getZextVMoveL(MVT::ValueType VT, MVT::ValueType OpVT,
|
|||||||
}
|
}
|
||||||
|
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
||||||
DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
|
DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
|
||||||
DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp)));
|
DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3561,14 +3562,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
SDOperand NewMask = NewOp.getOperand(2);
|
SDOperand NewMask = NewOp.getOperand(2);
|
||||||
if (isCommutedMOVL(NewMask.Val, true, false)) {
|
if (isCommutedMOVL(NewMask.Val, true, false)) {
|
||||||
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
|
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
|
||||||
return getZextVMoveL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
|
return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
|
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
|
||||||
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
|
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
|
||||||
DAG, *this);
|
DAG, *this);
|
||||||
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
|
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
|
||||||
return getZextVMoveL(VT, NewOp.getValueType(), NewOp.getOperand(1),
|
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
|
||||||
DAG, Subtarget);
|
DAG, Subtarget);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3577,7 +3578,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
if (V1IsUndef)
|
if (V1IsUndef)
|
||||||
return V2;
|
return V2;
|
||||||
if (ISD::isBuildVectorAllZeros(V1.Val))
|
if (ISD::isBuildVectorAllZeros(V1.Val))
|
||||||
return getZextVMoveL(VT, VT, V2, DAG, Subtarget);
|
return getVZextMovL(VT, VT, V2, DAG, Subtarget);
|
||||||
return Op;
|
return Op;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5675,7 +5676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
|
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
|
||||||
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
|
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
|
||||||
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
|
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
|
||||||
case X86ISD::ZEXT_VMOVL: return "X86ISD::ZEXT_VMOVL";
|
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
||||||
|
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6302,6 +6304,55 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
LD->getAlignment());
|
LD->getAlignment());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
|
||||||
|
SDOperand Elt = N->getOperand(i);
|
||||||
|
if (Elt.getOpcode() != ISD::MERGE_VALUES)
|
||||||
|
return Elt.Val;
|
||||||
|
return Elt.getOperand(Elt.ResNo).Val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
|
const X86Subtarget *Subtarget) {
|
||||||
|
// Ignore single operand BUILD_VECTOR.
|
||||||
|
if (N->getNumOperands() == 1)
|
||||||
|
return SDOperand();
|
||||||
|
|
||||||
|
MVT::ValueType VT = N->getValueType(0);
|
||||||
|
MVT::ValueType EVT = MVT::getVectorElementType(VT);
|
||||||
|
if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
|
||||||
|
// We are looking for load i64 and zero extend. We want to transform
|
||||||
|
// it before legalizer has a chance to expand it. Also look for i64
|
||||||
|
// BUILD_PAIR bit casted to f64.
|
||||||
|
return SDOperand();
|
||||||
|
// This must be an insertion into a zero vector.
|
||||||
|
SDOperand HighElt = N->getOperand(1);
|
||||||
|
if (HighElt.getOpcode() != ISD::UNDEF &&
|
||||||
|
!isZeroNode(HighElt))
|
||||||
|
return SDOperand();
|
||||||
|
|
||||||
|
// Value must be a load.
|
||||||
|
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||||
|
SDNode *Base = N->getOperand(0).Val;
|
||||||
|
if (!isa<LoadSDNode>(Base)) {
|
||||||
|
if (Base->getOpcode() == ISD::BIT_CONVERT)
|
||||||
|
Base = Base->getOperand(0).Val;
|
||||||
|
if (Base->getOpcode() != ISD::BUILD_PAIR)
|
||||||
|
return SDOperand();
|
||||||
|
SDNode *Pair = Base;
|
||||||
|
Base = getBuildPairElt(Pair, 0);
|
||||||
|
if (!ISD::isNON_EXTLoad(Base))
|
||||||
|
return SDOperand();
|
||||||
|
SDNode *NextLD = getBuildPairElt(Pair, 1);
|
||||||
|
if (!ISD::isNON_EXTLoad(NextLD) ||
|
||||||
|
!isConsecutiveLoad(NextLD, Base, 1, 4/*32 bits*/, MFI))
|
||||||
|
return SDOperand();
|
||||||
|
}
|
||||||
|
LoadSDNode *LD = cast<LoadSDNode>(Base);
|
||||||
|
|
||||||
|
// Transform it into VZEXT_LOAD addr.
|
||||||
|
return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
|
||||||
|
}
|
||||||
|
|
||||||
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
|
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
|
||||||
static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
const X86Subtarget *Subtarget) {
|
const X86Subtarget *Subtarget) {
|
||||||
@ -6498,6 +6549,7 @@ SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
switch (N->getOpcode()) {
|
switch (N->getOpcode()) {
|
||||||
default: break;
|
default: break;
|
||||||
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
|
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
|
||||||
|
case ISD::BUILD_VECTOR: return PerformBuildVectorCombine(N, DAG, Subtarget);
|
||||||
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
|
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
|
||||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||||
case X86ISD::FXOR:
|
case X86ISD::FXOR:
|
||||||
|
@ -201,8 +201,11 @@ namespace llvm {
|
|||||||
// FNSTCW16m - Store FP control world into i16 memory.
|
// FNSTCW16m - Store FP control world into i16 memory.
|
||||||
FNSTCW16m,
|
FNSTCW16m,
|
||||||
|
|
||||||
// ZEXT_VMOVL - Vector move low and zero extend.
|
// VZEXT_MOVL - Vector move low and zero extend.
|
||||||
ZEXT_VMOVL
|
VZEXT_MOVL,
|
||||||
|
|
||||||
|
// VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
|
||||||
|
VZEXT_LOAD
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -201,12 +201,12 @@ let AddedComplexity = 15 in
|
|||||||
def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
|
def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
|
||||||
"movd\t{$src, $dst|$dst, $src}",
|
"movd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst,
|
[(set VR64:$dst,
|
||||||
(v2i32 (X86zvmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
|
(v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
|
||||||
let AddedComplexity = 20 in
|
let AddedComplexity = 20 in
|
||||||
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
|
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
|
||||||
"movd\t{$src, $dst|$dst, $src}",
|
"movd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst,
|
[(set VR64:$dst,
|
||||||
(v2i32 (X86zvmovl (v2i32
|
(v2i32 (X86vzmovl (v2i32
|
||||||
(scalar_to_vector (loadi32 addr:$src))))))]>;
|
(scalar_to_vector (loadi32 addr:$src))))))]>;
|
||||||
|
|
||||||
// Arithmetic Instructions
|
// Arithmetic Instructions
|
||||||
@ -560,9 +560,9 @@ def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
|
|||||||
// Move scalar to XMM zero-extended
|
// Move scalar to XMM zero-extended
|
||||||
// movd to XMM register zero-extends
|
// movd to XMM register zero-extends
|
||||||
let AddedComplexity = 15 in {
|
let AddedComplexity = 15 in {
|
||||||
def : Pat<(v8i8 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
|
def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
|
||||||
(MMX_MOVZDI2PDIrr GR32:$src)>;
|
(MMX_MOVZDI2PDIrr GR32:$src)>;
|
||||||
def : Pat<(v4i16 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
|
def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
|
||||||
(MMX_MOVZDI2PDIrr GR32:$src)>;
|
(MMX_MOVZDI2PDIrr GR32:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,7 +47,10 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW",
|
|||||||
def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
||||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
|
||||||
SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
|
SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
|
||||||
def X86zvmovl : SDNode<"X86ISD::ZEXT_VMOVL", SDTUnaryOp>;
|
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||||
|
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
||||||
|
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||||
|
[SDNPHasChain, SDNPMayLoad]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE Complex Patterns
|
// SSE Complex Patterns
|
||||||
@ -1008,10 +1011,10 @@ let neverHasSideEffects = 1 in
|
|||||||
let AddedComplexity = 20 in
|
let AddedComplexity = 20 in
|
||||||
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
|
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
|
||||||
"movss\t{$src, $dst|$dst, $src}",
|
"movss\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (v4f32 (X86zvmovl (v4f32 (scalar_to_vector
|
[(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
|
||||||
(loadf32 addr:$src))))))]>;
|
(loadf32 addr:$src))))))]>;
|
||||||
|
|
||||||
def : Pat<(v4f32 (X86zvmovl (memopv4f32 addr:$src))),
|
def : Pat<(v4f32 (X86vzmovl (memopv4f32 addr:$src))),
|
||||||
(MOVZSS2PSrm addr:$src)>;
|
(MOVZSS2PSrm addr:$src)>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -2266,22 +2269,23 @@ let AddedComplexity = 20 in
|
|||||||
def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||||
"movsd\t{$src, $dst|$dst, $src}",
|
"movsd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (X86zvmovl (v2f64 (scalar_to_vector
|
(v2f64 (X86vzmovl (v2f64 (scalar_to_vector
|
||||||
(loadf64 addr:$src))))))]>;
|
(loadf64 addr:$src))))))]>;
|
||||||
|
|
||||||
def : Pat<(v2f64 (X86zvmovl (memopv2f64 addr:$src))),
|
def : Pat<(v2f64 (X86vzmovl (memopv2f64 addr:$src))),
|
||||||
(MOVZSD2PDrm addr:$src)>;
|
(MOVZSD2PDrm addr:$src)>;
|
||||||
|
def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
|
||||||
|
|
||||||
// movd / movq to XMM register zero-extends
|
// movd / movq to XMM register zero-extends
|
||||||
let AddedComplexity = 15 in {
|
let AddedComplexity = 15 in {
|
||||||
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
||||||
"movd\t{$src, $dst|$dst, $src}",
|
"movd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (v4i32 (X86zvmovl
|
[(set VR128:$dst, (v4i32 (X86vzmovl
|
||||||
(v4i32 (scalar_to_vector GR32:$src)))))]>;
|
(v4i32 (scalar_to_vector GR32:$src)))))]>;
|
||||||
// This is X86-64 only.
|
// This is X86-64 only.
|
||||||
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
|
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
|
||||||
"mov{d|q}\t{$src, $dst|$dst, $src}",
|
"mov{d|q}\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (v2i64 (X86zvmovl
|
[(set VR128:$dst, (v2i64 (X86vzmovl
|
||||||
(v2i64 (scalar_to_vector GR64:$src)))))]>;
|
(v2i64 (scalar_to_vector GR64:$src)))))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2289,28 +2293,30 @@ let AddedComplexity = 20 in {
|
|||||||
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
|
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
|
||||||
"movd\t{$src, $dst|$dst, $src}",
|
"movd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4i32 (X86zvmovl (v4i32 (scalar_to_vector
|
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
|
||||||
(loadi32 addr:$src))))))]>;
|
(loadi32 addr:$src))))))]>;
|
||||||
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||||
"movq\t{$src, $dst|$dst, $src}",
|
"movq\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2i64 (X86zvmovl (v2i64 (scalar_to_vector
|
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
|
||||||
(loadi64 addr:$src))))))]>, XS,
|
(loadi64 addr:$src))))))]>, XS,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
|
||||||
|
|
||||||
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
|
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
|
||||||
// IA32 document. movq xmm1, xmm2 does clear the high bits.
|
// IA32 document. movq xmm1, xmm2 does clear the high bits.
|
||||||
let AddedComplexity = 15 in
|
let AddedComplexity = 15 in
|
||||||
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
"movq\t{$src, $dst|$dst, $src}",
|
"movq\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (v2i64 (X86zvmovl (v2i64 VR128:$src))))]>,
|
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
|
||||||
XS, Requires<[HasSSE2]>;
|
XS, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
let AddedComplexity = 20 in
|
let AddedComplexity = 20 in
|
||||||
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||||
"movq\t{$src, $dst|$dst, $src}",
|
"movq\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (v2i64 (X86zvmovl
|
[(set VR128:$dst, (v2i64 (X86vzmovl
|
||||||
(memopv2i64 addr:$src))))]>,
|
(memopv2i64 addr:$src))))]>,
|
||||||
XS, Requires<[HasSSE2]>;
|
XS, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
@ -2758,9 +2764,9 @@ let Predicates = [HasSSE2] in {
|
|||||||
// movd to XMM register zero-extends
|
// movd to XMM register zero-extends
|
||||||
let AddedComplexity = 15 in {
|
let AddedComplexity = 15 in {
|
||||||
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
|
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
|
||||||
def : Pat<(v2f64 (X86zvmovl (v2f64 (scalar_to_vector FR64:$src)))),
|
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
|
||||||
(MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
|
(MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
|
||||||
def : Pat<(v4f32 (X86zvmovl (v4f32 (scalar_to_vector FR32:$src)))),
|
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
|
||||||
(MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
|
(MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2916,7 +2922,7 @@ let AddedComplexity = 15 in
|
|||||||
def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
|
def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
|
||||||
MOVL_shuffle_mask)),
|
MOVL_shuffle_mask)),
|
||||||
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
def : Pat<(v2f64 (X86zvmovl (v2f64 VR128:$src))),
|
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
|
||||||
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// FIXME: Temporary workaround since 2-wide shuffle is broken.
|
// FIXME: Temporary workaround since 2-wide shuffle is broken.
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1
|
||||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
|
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
|
||||||
|
|
||||||
define <2 x i64> @t1(i64 %x) nounwind {
|
define <2 x i64> @t1(i64 %x) nounwind {
|
||||||
|
19
test/CodeGen/X86/vec_set-F.ll
Normal file
19
test/CodeGen/X86/vec_set-F.ll
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 3
|
||||||
|
|
||||||
|
define <2 x i64> @t1(<2 x i64>* %ptr) nounwind {
|
||||||
|
%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
|
||||||
|
%tmp615 = load <2 x i32>* %tmp45
|
||||||
|
%tmp7 = bitcast <2 x i32> %tmp615 to i64
|
||||||
|
%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %tmp7, i32 0
|
||||||
|
ret <2 x i64> %tmp8
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x i64> @t2(i64 %x) nounwind {
|
||||||
|
%tmp717 = bitcast i64 %x to double
|
||||||
|
%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0
|
||||||
|
%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1
|
||||||
|
%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>
|
||||||
|
ret <2 x i64> %tmp11
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user