mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[X86][SSE] getFauxShuffle - ignore undemanded sources for PACKSS/PACKUS faux shuffles
If we don't care about an entire LHS/RHS of the PACK op, then can just treat it the same as undef (we don't care if it saturates) and is safe to treat as a shuffle. This can happen if we attempt to decode as a faux shuffle before SimplifyDemandedVectorElts has been called on the PACK which should replace the source with UNDEF entirely.
This commit is contained in:
parent
8b5de07c6a
commit
5a6feccd46
@ -7595,19 +7595,19 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
|
||||
APInt EltsLHS, EltsRHS;
|
||||
getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS);
|
||||
|
||||
// If we know input saturation won't happen we can treat this
|
||||
// as a truncation shuffle.
|
||||
// If we know input saturation won't happen (or we don't care for particular
|
||||
// lanes), we can treat this as a truncation shuffle.
|
||||
if (Opcode == X86ISD::PACKSS) {
|
||||
if ((!N0.isUndef() &&
|
||||
if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
|
||||
DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) ||
|
||||
(!N1.isUndef() &&
|
||||
(!(N1.isUndef() || EltsRHS.isNullValue()) &&
|
||||
DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt))
|
||||
return false;
|
||||
} else {
|
||||
APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
|
||||
if ((!N0.isUndef() &&
|
||||
if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
|
||||
!DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) ||
|
||||
(!N1.isUndef() &&
|
||||
(!(N1.isUndef() || EltsRHS.isNullValue()) &&
|
||||
!DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1)))
|
||||
return false;
|
||||
}
|
||||
|
@ -1697,9 +1697,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: psrlw $8, %xmm1
|
||||
; SSE2-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
@ -1718,17 +1717,16 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm0
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pand %xmm1, %xmm0
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pand %xmm0, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: movd %xmm1, %eax
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrld $8, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1741,10 +1739,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrld $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -1889,9 +1885,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: psrlw $8, %xmm1
|
||||
; SSE2-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
@ -1919,16 +1914,15 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm2
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm2
|
||||
; SSE41-NEXT: pand %xmm1, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrld $8, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
@ -1947,10 +1941,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrld $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -2130,9 +2122,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: psrlw $8, %xmm1
|
||||
; SSE2-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
@ -2173,14 +2164,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: pand %xmm1, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrld $8, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
@ -2210,10 +2200,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrld $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -2447,9 +2435,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw %xmm2, %xmm1
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: psrlw $8, %xmm0
|
||||
; SSE2-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
@ -2514,17 +2501,16 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pand %xmm4, %xmm0
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pand %xmm0, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm4
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm1
|
||||
; SSE41-NEXT: movd %xmm1, %eax
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrld $8, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pmullw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2569,10 +2555,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrld $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
|
Loading…
x
Reference in New Issue
Block a user