1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

Lower some x86 shuffle sequences to the vblend family of instructions.

llvm-svn: 154313
This commit is contained in:
Nadav Rotem 2012-04-09 08:33:21 +00:00
parent 756a33b1e6
commit 9f7f17826e
2 changed files with 107 additions and 0 deletions

View File

@ -5377,6 +5377,69 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
// Try to lower a shuffle node into a simple blend instruction.
static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
const X86Subtarget *Subtarget,
SelectionDAG &DAG, EVT PtrTy) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
LLVMContext *Context = DAG.getContext();
EVT VT = Op.getValueType();
EVT InVT = V1.getValueType();
EVT EltVT = VT.getVectorElementType();
unsigned EltSize = EltVT.getSizeInBits();
int MaskSize = VT.getVectorNumElements();
int InSize = InVT.getVectorNumElements();
// TODO: At the moment we only use AVX blends. We could also use SSE4 blends.
if (!Subtarget->hasAVX())
return SDValue();
if (MaskSize != InSize)
return SDValue();
SmallVector<Constant*,2> MaskVals;
ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0));
ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1));
for (int i = 0; i < MaskSize; ++i) {
int EltIdx = SVOp->getMaskElt(i);
if (EltIdx == i || EltIdx == -1)
MaskVals.push_back(NegOne);
else if (EltIdx == (i + MaskSize))
MaskVals.push_back(Zero);
else return SDValue();
}
Constant *MaskC = ConstantVector::get(MaskVals);
EVT MaskTy = EVT::getEVT(MaskC->getType());
assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size");
SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy);
unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment();
SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8)
return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
if (Subtarget->hasAVX()) {
switch (MaskTy.getSimpleVT().SimpleTy) {
default: return SDValue();
case MVT::v16i8:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i32:
case MVT::v4i64:
return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
}
}
return SDValue();
}
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
@ -6539,6 +6602,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG, getPointerTy());
if (BlendOp.getNode())
return BlendOp;
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but

View File

@ -162,3 +162,43 @@ i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
62>
ret <32 x i8> %0
}
; CHECK: blend1
; CHECK: vblendvps
; CHECK: ret
define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %t
}
; CHECK: blend2
; CHECK: vblendvps
; CHECK: ret
define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x i32> %t
}
; CHECK: blend2a
; CHECK: vblendvps
; CHECK: ret
define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
%t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %t
}
; CHECK: blend3
; CHECK-NOT: vblendvps
; CHECK: ret
define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
ret <4 x i32> %t
}
; CHECK: blend4
; CHECK: vblendvpd
; CHECK: ret
define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
%t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i64> %t
}