mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86][SSE] Improve PSHUFB lowering from either input
Canonicalization may leave the zeroable vector in the first input. llvm-svn: 287461
This commit is contained in:
parent
dfba5e88ee
commit
04cf2ff75c
@ -7582,8 +7582,7 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
|
||||
return Zeroable;
|
||||
}
|
||||
|
||||
/// Try to lower a shuffle with a single PSHUFB of V1.
|
||||
/// This is only possible if V2 is unused (at all, or only for zero elements).
|
||||
/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
|
||||
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
|
||||
ArrayRef<int> Mask, SDValue V1,
|
||||
SDValue V2,
|
||||
@ -7603,6 +7602,7 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
|
||||
// Sign bit set in i8 mask means zero element.
|
||||
SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
|
||||
|
||||
SDValue V;
|
||||
for (int i = 0; i < NumBytes; ++i) {
|
||||
int M = Mask[i / NumEltBytes];
|
||||
if (M < 0) {
|
||||
@ -7613,9 +7613,13 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
|
||||
PSHUFBMask[i] = ZeroMask;
|
||||
continue;
|
||||
}
|
||||
// Only allow V1.
|
||||
if (M >= Size)
|
||||
|
||||
// We can only use a single input of V1 or V2.
|
||||
SDValue SrcV = (M >= Size ? V2 : V1);
|
||||
if (V && V != SrcV)
|
||||
return SDValue();
|
||||
V = SrcV;
|
||||
M %= Size;
|
||||
|
||||
// PSHUFB can't cross lanes, ensure this doesn't happen.
|
||||
if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
|
||||
@ -7625,10 +7629,11 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
|
||||
M = M * NumEltBytes + (i % NumEltBytes);
|
||||
PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
|
||||
}
|
||||
assert(V && "Failed to find a source input");
|
||||
|
||||
MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
|
||||
return DAG.getBitcast(
|
||||
VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V1),
|
||||
VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
|
||||
DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
|
||||
}
|
||||
|
||||
|
@ -120,11 +120,7 @@ define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) {
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: movl $255, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
|
||||
@ -137,10 +133,7 @@ define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) {
|
||||
;
|
||||
; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
|
||||
; AVX512VBMI: # BB#0:
|
||||
; AVX512VBMI-NEXT: vmovdqu8 {{.*#+}} zmm2 = [64,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63]
|
||||
; AVX512VBMI-NEXT: vpxord %zmm1, %zmm1, %zmm1
|
||||
; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm2, %zmm1
|
||||
; AVX512VBMI-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; AVX512VBMI-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI-NEXT: retq
|
||||
%shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 0, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
|
||||
ret <64 x i8> %shuffle
|
||||
|
Loading…
Reference in New Issue
Block a user