mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[x86] Teach the new vector shuffle lowering about the simplest of
'insertps' patterns. This replaces two shuffles with a single insertps in very common cases. My next patch will extend this to leverage the zeroing capabilities of insertps which will allow it to be used in a much wider set of cases. llvm-svn: 217100
This commit is contained in:
parent
d5c050ced7
commit
ca60fdbee1
@ -7182,6 +7182,21 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Check wether all of one set of inputs to a shuffle mask are in place.
|
||||
///
|
||||
/// Mask entries pointing at the other input or undef will be skipped.
|
||||
static bool isShuffleMaskInputInPlace(ArrayRef<int> Mask, bool LoInput = true) {
|
||||
int Size = Mask.size();
|
||||
for (int i = 0; i < Size; ++i) {
|
||||
int M = Mask[i];
|
||||
if (M == -1 || (LoInput && M >= 4) || (!LoInput && M < 4))
|
||||
continue;
|
||||
if (M - (LoInput ? 0 : Size) != i)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
|
||||
// 2013 will allow us to use it as a non-type template parameter.
|
||||
namespace {
|
||||
@ -7365,6 +7380,20 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
int V2Index =
|
||||
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
|
||||
Mask.begin();
|
||||
|
||||
// Check for whether we can use INSERTPS to perform the blend. We only use
|
||||
// INSERTPS when the V1 elements are already in the correct locations
|
||||
// because otherwise we can just always use two SHUFPS instructions which
|
||||
// are much smaller to encode than a SHUFPS and an INSERTPS.
|
||||
if (Subtarget->hasSSE41() &&
|
||||
isShuffleMaskInputInPlace(Mask, /*LoInput*/ true)) {
|
||||
// Insert the V2 element into the desired position.
|
||||
SDValue InsertPSMask =
|
||||
DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4);
|
||||
return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
|
||||
InsertPSMask);
|
||||
}
|
||||
|
||||
// Compute the index adjacent to V2Index and in the same half by toggling
|
||||
// the low bit.
|
||||
int V2AdjIndex = V2Index ^ 1;
|
||||
|
@ -121,10 +121,18 @@ define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
|
||||
}
|
||||
|
||||
define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
|
||||
; ALL-LABEL: @shuffle_v4i32_0124
|
||||
; ALL: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
|
||||
; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
|
||||
; ALL-NEXT: retq
|
||||
; SSE2-LABEL: @shuffle_v4i32_0124
|
||||
; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
|
||||
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: @shuffle_v4i32_0124
|
||||
; SSE41: insertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: @shuffle_v4i32_0124
|
||||
; AVX1: vinsertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user