mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Shuffle optimization for AVX/AVX2.
The current patch optimizes frequently used shuffle patterns and gives these instruction sequence reduction. Before: vshufps $-35, %xmm1, %xmm0, %xmm2 ## xmm2 = xmm0[1,3],xmm1[1,3] vpermilps $-40, %xmm2, %xmm2 ## xmm2 = xmm2[0,2,1,3] vextractf128 $1, %ymm1, %xmm1 vextractf128 $1, %ymm0, %xmm0 vshufps $-35, %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[1,3],xmm1[1,3] vpermilps $-40, %xmm0, %xmm0 ## xmm0 = xmm0[0,2,1,3] vinsertf128 $1, %xmm0, %ymm2, %ymm0 After: vshufps $13, %ymm0, %ymm1, %ymm1 ## ymm1 = ymm1[1,3],ymm0[0,0],ymm1[5,7],ymm0[4,4] vshufps $13, %ymm0, %ymm0, %ymm0 ## ymm0 = ymm0[1,3,0,0,5,7,4,4] vunpcklps %ymm1, %ymm0, %ymm0 ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] llvm-svn: 159188
This commit is contained in:
parent
872747b0f0
commit
832f074a32
@ -3496,6 +3496,53 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// Some special combinations that can be optimized.
|
||||
//
|
||||
static
|
||||
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
|
||||
SelectionDAG &DAG) {
|
||||
EVT VT = SVOp->getValueType(0);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
DebugLoc dl = SVOp->getDebugLoc();
|
||||
|
||||
if (VT != MVT::v8i32 && VT != MVT::v8f32)
|
||||
return SDValue();
|
||||
|
||||
ArrayRef<int> Mask = SVOp->getMask();
|
||||
|
||||
// These are the special masks that may be optimized.
|
||||
static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
|
||||
static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
|
||||
bool MatchEvenMask = true;
|
||||
bool MatchOddMask = true;
|
||||
for (int i=0; i<8; ++i) {
|
||||
if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
|
||||
MatchEvenMask = false;
|
||||
if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
|
||||
MatchOddMask = false;
|
||||
}
|
||||
static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1};
|
||||
static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1};
|
||||
|
||||
const int *CompactionMask;
|
||||
if (MatchEvenMask)
|
||||
CompactionMask = CompactionMaskEven;
|
||||
else if (MatchOddMask)
|
||||
CompactionMask = CompactionMaskOdd;
|
||||
else
|
||||
return SDValue();
|
||||
|
||||
SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
|
||||
|
||||
SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0),
|
||||
UndefNode, CompactionMask);
|
||||
SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1),
|
||||
UndefNode, CompactionMask);
|
||||
static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13};
|
||||
return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask);
|
||||
}
|
||||
|
||||
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
|
||||
static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
|
||||
@ -5982,6 +6029,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
|
||||
/// which could not be matched by any known target speficic shuffle
|
||||
static SDValue
|
||||
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
|
||||
|
||||
SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
|
||||
if (NewOp.getNode())
|
||||
return NewOp;
|
||||
|
||||
EVT VT = SVOp->getValueType(0);
|
||||
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
@ -227,3 +227,24 @@ define <8 x float> @test17(<4 x float> %y) {
|
||||
%x = shufflevector <4 x float> %y, <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <8 x float> %x
|
||||
}
|
||||
|
||||
; CHECK: test18
|
||||
; CHECK: vshufps
|
||||
; CHECK: vshufps
|
||||
; CHECK: vunpcklps
|
||||
; CHECK: ret
|
||||
define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
|
||||
%S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
|
||||
ret <8 x float>%S
|
||||
}
|
||||
|
||||
; CHECK: test19
|
||||
; CHECK: vshufps
|
||||
; CHECK: vshufps
|
||||
; CHECK: vunpcklps
|
||||
; CHECK: ret
|
||||
define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
|
||||
%S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
|
||||
ret <8 x float>%S
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user