mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
AVX2 optimization.
Added generation of VPSHUB instruction for <32 x i8> vector shuffle when possible. llvm-svn: 163312
This commit is contained in:
parent
196b00bd57
commit
9339eef307
@ -6011,6 +6011,40 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
|
||||
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
|
||||
}
|
||||
|
||||
// v32i8 shuffles - Translate to VPSHUFB if possible.
|
||||
static
|
||||
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
|
||||
SelectionDAG &DAG,
|
||||
const X86TargetLowering &TLI) {
|
||||
EVT VT = SVOp->getValueType(0);
|
||||
SDValue V1 = SVOp->getOperand(0);
|
||||
SDValue V2 = SVOp->getOperand(1);
|
||||
DebugLoc dl = SVOp->getDebugLoc();
|
||||
ArrayRef<int> MaskVals = SVOp->getMask();
|
||||
|
||||
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
|
||||
|
||||
if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || !V2IsUndef)
|
||||
return SDValue();
|
||||
|
||||
SmallVector<SDValue,32> pshufbMask;
|
||||
for (unsigned i = 0; i != 32; i++) {
|
||||
int EltIdx = MaskVals[i];
|
||||
if (EltIdx < 0 || EltIdx >= 32)
|
||||
EltIdx = 0x80;
|
||||
else {
|
||||
if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16))
|
||||
// Cross lane is not allowed.
|
||||
return SDValue();
|
||||
EltIdx &= 0xf;
|
||||
}
|
||||
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
|
||||
}
|
||||
return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::v32i8, &pshufbMask[0], 32));
|
||||
}
|
||||
|
||||
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
|
||||
/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
|
||||
/// done when every pair / quad of shuffle mask elements point to elements in
|
||||
@ -6837,6 +6871,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return NewOp;
|
||||
}
|
||||
|
||||
if (VT == MVT::v32i8) {
|
||||
SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, DAG, *this);
|
||||
if (NewOp.getNode())
|
||||
return NewOp;
|
||||
}
|
||||
|
||||
// Handle all 128-bit wide vectors with 4 elements, and match them with
|
||||
// several different shuffle types.
|
||||
if (NumElems == 4 && VT.is128BitVector())
|
||||
|
@ -26,3 +26,14 @@ entry:
|
||||
%shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x i16> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vpshufb_test
|
||||
; CHECK; vpshufb {{.*\(%r.*}}, %ymm
|
||||
; CHECK: ret
|
||||
define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind {
|
||||
%S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
|
||||
i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
|
||||
i32 18, i32 19, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25,
|
||||
i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
|
||||
ret <32 x i8>%S
|
||||
}
|
Loading…
Reference in New Issue
Block a user