mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[X86][SSE] Call SimplifyMultipleUseDemandedBits on PACKSS/PACKUS arguments.
This mainly helps to replace unused arguments with UNDEF in the case where they have multiple users. llvm-svn: 368026
This commit is contained in:
parent
c840b47a3f
commit
04fd71aa2f
@ -34129,16 +34129,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||||||
}
|
}
|
||||||
case X86ISD::PACKSS:
|
case X86ISD::PACKSS:
|
||||||
case X86ISD::PACKUS: {
|
case X86ISD::PACKUS: {
|
||||||
|
SDValue N0 = Op.getOperand(0);
|
||||||
|
SDValue N1 = Op.getOperand(1);
|
||||||
|
|
||||||
APInt DemandedLHS, DemandedRHS;
|
APInt DemandedLHS, DemandedRHS;
|
||||||
getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
|
getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
|
||||||
|
|
||||||
APInt SrcUndef, SrcZero;
|
APInt SrcUndef, SrcZero;
|
||||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
|
if (SimplifyDemandedVectorElts(N0, DemandedLHS, SrcUndef, SrcZero, TLO,
|
||||||
SrcZero, TLO, Depth + 1))
|
Depth + 1))
|
||||||
return true;
|
return true;
|
||||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
|
if (SimplifyDemandedVectorElts(N1, DemandedRHS, SrcUndef, SrcZero, TLO,
|
||||||
SrcZero, TLO, Depth + 1))
|
Depth + 1))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
// Aggressively peek through ops to get at the demanded elts.
|
||||||
|
// TODO - we should do this for all target/faux shuffles ops.
|
||||||
|
if (!DemandedElts.isAllOnesValue()) {
|
||||||
|
APInt DemandedSrcBits =
|
||||||
|
APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
|
||||||
|
SDValue NewN0 = SimplifyMultipleUseDemandedBits(
|
||||||
|
N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1);
|
||||||
|
SDValue NewN1 = SimplifyMultipleUseDemandedBits(
|
||||||
|
N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1);
|
||||||
|
if (NewN0 || NewN1) {
|
||||||
|
NewN0 = NewN0 ? NewN0 : N0;
|
||||||
|
NewN1 = NewN1 ? NewN1 : N1;
|
||||||
|
return TLO.CombineTo(Op,
|
||||||
|
TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86ISD::HADD:
|
case X86ISD::HADD:
|
||||||
|
@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
|
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
|
||||||
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
||||||
; SSE41-NEXT: packusdw %xmm1, %xmm4
|
; SSE41-NEXT: packusdw %xmm1, %xmm4
|
||||||
; SSE41-NEXT: packuswb %xmm4, %xmm4
|
; SSE41-NEXT: packuswb %xmm0, %xmm4
|
||||||
; SSE41-NEXT: movq %xmm4, (%rdi)
|
; SSE41-NEXT: movq %xmm4, (%rdi)
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
|
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
|
||||||
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
||||||
; SSE41-NEXT: packusdw %xmm1, %xmm4
|
; SSE41-NEXT: packusdw %xmm1, %xmm4
|
||||||
; SSE41-NEXT: packuswb %xmm4, %xmm4
|
; SSE41-NEXT: packuswb %xmm0, %xmm4
|
||||||
; SSE41-NEXT: movq %xmm4, (%rdi)
|
; SSE41-NEXT: movq %xmm4, (%rdi)
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE41-NEXT: andpd %xmm0, %xmm2
|
; SSE41-NEXT: andpd %xmm0, %xmm2
|
||||||
; SSE41-NEXT: packusdw %xmm7, %xmm2
|
; SSE41-NEXT: packusdw %xmm7, %xmm2
|
||||||
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
||||||
; SSE41-NEXT: packuswb %xmm2, %xmm2
|
; SSE41-NEXT: packuswb %xmm0, %xmm2
|
||||||
; SSE41-NEXT: movq %xmm2, (%rdi)
|
; SSE41-NEXT: movq %xmm2, (%rdi)
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE41-NEXT: andpd %xmm0, %xmm2
|
; SSE41-NEXT: andpd %xmm0, %xmm2
|
||||||
; SSE41-NEXT: packusdw %xmm7, %xmm2
|
; SSE41-NEXT: packusdw %xmm7, %xmm2
|
||||||
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
||||||
; SSE41-NEXT: packuswb %xmm2, %xmm2
|
; SSE41-NEXT: packuswb %xmm0, %xmm2
|
||||||
; SSE41-NEXT: movq %xmm2, (%rdi)
|
; SSE41-NEXT: movq %xmm2, (%rdi)
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE2-NEXT: por %xmm2, %xmm0
|
; SSE2-NEXT: por %xmm2, %xmm0
|
||||||
; SSE2-NEXT: packuswb %xmm4, %xmm0
|
; SSE2-NEXT: packuswb %xmm4, %xmm0
|
||||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||||
; SSE2-NEXT: retq
|
; SSE2-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSSE3-NEXT: por %xmm2, %xmm0
|
; SSSE3-NEXT: por %xmm2, %xmm0
|
||||||
; SSSE3-NEXT: packuswb %xmm4, %xmm0
|
; SSSE3-NEXT: packuswb %xmm4, %xmm0
|
||||||
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSSE3-NEXT: packuswb %xmm1, %xmm1
|
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
||||||
; SSSE3-NEXT: retq
|
; SSSE3-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
|
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
|
||||||
; SSE41-NEXT: packusdw %xmm5, %xmm9
|
; SSE41-NEXT: packusdw %xmm5, %xmm9
|
||||||
; SSE41-NEXT: packusdw %xmm9, %xmm1
|
; SSE41-NEXT: packusdw %xmm9, %xmm1
|
||||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSE41-NEXT: movq %xmm1, (%rdi)
|
; SSE41-NEXT: movq %xmm1, (%rdi)
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE2-NEXT: pandn %xmm2, %xmm5
|
; SSE2-NEXT: pandn %xmm2, %xmm5
|
||||||
; SSE2-NEXT: por %xmm0, %xmm5
|
; SSE2-NEXT: por %xmm0, %xmm5
|
||||||
; SSE2-NEXT: packuswb %xmm6, %xmm5
|
; SSE2-NEXT: packuswb %xmm6, %xmm5
|
||||||
; SSE2-NEXT: packuswb %xmm5, %xmm5
|
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||||
; SSE2-NEXT: movq %xmm5, (%rdi)
|
; SSE2-NEXT: movq %xmm5, (%rdi)
|
||||||
; SSE2-NEXT: retq
|
; SSE2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE2-NEXT: por %xmm2, %xmm0
|
; SSE2-NEXT: por %xmm2, %xmm0
|
||||||
; SSE2-NEXT: packuswb %xmm4, %xmm0
|
; SSE2-NEXT: packuswb %xmm4, %xmm0
|
||||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||||
; SSE2-NEXT: retq
|
; SSE2-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSSE3-NEXT: por %xmm2, %xmm0
|
; SSSE3-NEXT: por %xmm2, %xmm0
|
||||||
; SSSE3-NEXT: packuswb %xmm4, %xmm0
|
; SSSE3-NEXT: packuswb %xmm4, %xmm0
|
||||||
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSSE3-NEXT: packuswb %xmm1, %xmm1
|
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
||||||
; SSSE3-NEXT: retq
|
; SSSE3-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
|
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
|
||||||
; SSE41-NEXT: packusdw %xmm5, %xmm9
|
; SSE41-NEXT: packusdw %xmm5, %xmm9
|
||||||
; SSE41-NEXT: packusdw %xmm9, %xmm1
|
; SSE41-NEXT: packusdw %xmm9, %xmm1
|
||||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||||
; SSE41-NEXT: movq %xmm1, (%rdi)
|
; SSE41-NEXT: movq %xmm1, (%rdi)
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
|||||||
; SSE2-NEXT: pandn %xmm2, %xmm5
|
; SSE2-NEXT: pandn %xmm2, %xmm5
|
||||||
; SSE2-NEXT: por %xmm0, %xmm5
|
; SSE2-NEXT: por %xmm0, %xmm5
|
||||||
; SSE2-NEXT: packuswb %xmm6, %xmm5
|
; SSE2-NEXT: packuswb %xmm6, %xmm5
|
||||||
; SSE2-NEXT: packuswb %xmm5, %xmm5
|
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||||
; SSE2-NEXT: movq %xmm5, (%rdi)
|
; SSE2-NEXT: movq %xmm5, (%rdi)
|
||||||
; SSE2-NEXT: retq
|
; SSE2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
Loading…
Reference in New Issue
Block a user