1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

[X86] SimplifyDemandedVectorEltsForTargetNode - remove identity target shuffles before simplifying inputs

By removing demanded target shuffles that simplify to zero/undef/identity before simplifying its inputs we improve chances of further simplification, as only the immediate parent user of the combined is added back to the work list - this still doesn't help us if its passed through other ops though (bitcasts....).

llvm-svn: 343390
This commit is contained in:
Simon Pilgrim 2018-09-29 18:15:26 +00:00
parent eb9cee2f29
commit 1a4f4d3127
3 changed files with 21 additions and 23 deletions

View File

@ -31732,25 +31732,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
[VT](SDValue V) { return VT != V.getValueType(); })) [VT](SDValue V) { return VT != V.getValueType(); }))
return false; return false;
// Attempt to simplify inputs.
int NumSrcs = OpInputs.size();
for (int Src = 0; Src != NumSrcs; ++Src) {
int Lo = Src * NumElts;
APInt SrcElts = APInt::getNullValue(NumElts);
for (int i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
int M = OpMask[i] - Lo;
if (0 <= M && M < NumElts)
SrcElts.setBit(M);
}
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
}
// Check if shuffle mask can be simplified to undef/zero/identity. // Check if shuffle mask can be simplified to undef/zero/identity.
int NumSrcs = OpInputs.size();
for (int i = 0; i != NumElts; ++i) for (int i = 0; i != NumElts; ++i)
if (!DemandedElts[i]) if (!DemandedElts[i])
OpMask[i] = SM_SentinelUndef; OpMask[i] = SM_SentinelUndef;
@ -31768,6 +31751,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts)) if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts))
return TLO.CombineTo(Op, OpInputs[Src]); return TLO.CombineTo(Op, OpInputs[Src]);
// Attempt to simplify inputs.
for (int Src = 0; Src != NumSrcs; ++Src) {
int Lo = Src * NumElts;
APInt SrcElts = APInt::getNullValue(NumElts);
for (int i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
int M = OpMask[i] - Lo;
if (0 <= M && M < NumElts)
SrcElts.setBit(M);
}
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
}
// Extract known zero/undef elements. // Extract known zero/undef elements.
// TODO - Propagate input undef/zero elts. // TODO - Propagate input undef/zero elts.
for (int i = 0; i != NumElts; ++i) { for (int i = 0; i != NumElts; ++i) {

View File

@ -9,9 +9,7 @@ define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) n
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull {{[0-9]+}}(%esp), %ecx ; X32-NEXT: imull {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movl (%eax,%ecx), %eax
; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
; X32-NEXT: movd %xmm0, %eax
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: t: ; X64-LABEL: t:

View File

@ -1280,7 +1280,7 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,1,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,1,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1] ; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm5[0,3] ; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm5[0,3]
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,0],xmm11[2,3] ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,0],xmm11[2,0]
; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm5[2,0] ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm5[2,0]
; SSE2-NEXT: movaps %xmm8, %xmm5 ; SSE2-NEXT: movaps %xmm8, %xmm5
@ -1288,7 +1288,7 @@ define void @interleave_24i32_out(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm9[2,3,0,1] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm9[2,3,0,1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm8[0,3] ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm8[0,3]
; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,0],xmm10[2,3] ; SSE2-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,0],xmm10[2,0]
; SSE2-NEXT: movdqa %xmm9, %xmm2 ; SSE2-NEXT: movdqa %xmm9, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,3],xmm8[2,0] ; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,3],xmm8[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm10[0,0] ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm10[0,0]