mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[X86][SSE] Call SimplifyMultipleUseDemandedBits on PACKSS/PACKUS arguments.
This mainly helps to replace unused arguments with UNDEF in the case where they have multiple users. llvm-svn: 368026
This commit is contained in:
parent
c840b47a3f
commit
04fd71aa2f
@ -34129,16 +34129,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||
}
|
||||
case X86ISD::PACKSS:
|
||||
case X86ISD::PACKUS: {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
|
||||
APInt DemandedLHS, DemandedRHS;
|
||||
getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
|
||||
|
||||
APInt SrcUndef, SrcZero;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
|
||||
SrcZero, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(N0, DemandedLHS, SrcUndef, SrcZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
|
||||
SrcZero, TLO, Depth + 1))
|
||||
if (SimplifyDemandedVectorElts(N1, DemandedRHS, SrcUndef, SrcZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
|
||||
// Aggressively peek through ops to get at the demanded elts.
|
||||
// TODO - we should do this for all target/faux shuffles ops.
|
||||
if (!DemandedElts.isAllOnesValue()) {
|
||||
APInt DemandedSrcBits =
|
||||
APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
|
||||
SDValue NewN0 = SimplifyMultipleUseDemandedBits(
|
||||
N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1);
|
||||
SDValue NewN1 = SimplifyMultipleUseDemandedBits(
|
||||
N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1);
|
||||
if (NewN0 || NewN1) {
|
||||
NewN0 = NewN0 ? NewN0 : N0;
|
||||
NewN1 = NewN1 ? NewN1 : N1;
|
||||
return TLO.CombineTo(Op,
|
||||
TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::HADD:
|
||||
|
@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm4, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm4
|
||||
; SSE41-NEXT: movq %xmm4, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm4, %xmm4
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm4
|
||||
; SSE41-NEXT: movq %xmm4, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE41-NEXT: andpd %xmm0, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm7, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE41-NEXT: movq %xmm2, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE41-NEXT: andpd %xmm0, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm7, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE41-NEXT: movq %xmm2, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm4, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSSE3-NEXT: por %xmm2, %xmm0
|
||||
; SSSE3-NEXT: packuswb %xmm4, %xmm0
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSSE3-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
|
||||
; SSE41-NEXT: packusdw %xmm5, %xmm9
|
||||
; SSE41-NEXT: packusdw %xmm9, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm5
|
||||
; SSE2-NEXT: por %xmm0, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm6, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm5, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSE2-NEXT: movq %xmm5, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm4, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSSE3-NEXT: por %xmm2, %xmm0
|
||||
; SSSE3-NEXT: packuswb %xmm4, %xmm0
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSSE3-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSSE3-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movq %xmm1, (%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
|
||||
; SSE41-NEXT: packusdw %xmm5, %xmm9
|
||||
; SSE41-NEXT: packusdw %xmm9, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movq %xmm1, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm5
|
||||
; SSE2-NEXT: por %xmm0, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm6, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm5, %xmm5
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm5
|
||||
; SSE2-NEXT: movq %xmm5, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user