mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86][SSE] truncateVectorWithPACK - upper undef for 128->64 packing
If we're packing from 128-bits to 64-bits then we don't need the RHS argument. This helps with register allocation, especially as we avoid repeating a use of the input value.
This commit is contained in:
parent
bd9ded14fa
commit
7907ee6fb1
@ -20236,7 +20236,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
|
||||
InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits());
|
||||
OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits());
|
||||
In = DAG.getBitcast(InVT, In);
|
||||
SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, In);
|
||||
SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT));
|
||||
Res = extractSubVector(Res, 0, DAG, DL, 64);
|
||||
return DAG.getBitcast(DstVT, Res);
|
||||
}
|
||||
|
@ -1171,7 +1171,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: andpd %xmm0, %xmm6
|
||||
; SSE4-NEXT: packusdw %xmm7, %xmm6
|
||||
; SSE4-NEXT: packusdw %xmm2, %xmm6
|
||||
; SSE4-NEXT: packuswb %xmm6, %xmm6
|
||||
; SSE4-NEXT: packuswb %xmm0, %xmm6
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm5
|
||||
; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm5
|
||||
|
@ -870,7 +870,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm7
|
||||
; SSE2-NEXT: packuswb %xmm7, %xmm7
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm7
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm5
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm5
|
||||
@ -969,7 +969,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6
|
||||
; SSE4-NEXT: packusdw %xmm7, %xmm6
|
||||
; SSE4-NEXT: packusdw %xmm6, %xmm1
|
||||
; SSE4-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE4-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE4-NEXT: pcmpeqd %xmm8, %xmm5
|
||||
; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm5
|
||||
@ -4364,7 +4364,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
|
||||
; SSE2-NEXT: pandn %xmm9, %xmm6
|
||||
; SSE2-NEXT: por %xmm0, %xmm6
|
||||
; SSE2-NEXT: packuswb %xmm4, %xmm6
|
||||
; SSE2-NEXT: packuswb %xmm6, %xmm6
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm6
|
||||
; SSE2-NEXT: pcmpeqd %xmm8, %xmm3
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm3
|
||||
|
@ -3854,7 +3854,7 @@ define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm5
|
||||
; SSE41-NEXT: packusdw %xmm5, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -3637,7 +3637,7 @@ define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width"=
|
||||
; SSE41-NEXT: andpd %xmm0, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm4, %xmm1
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -2781,7 +2781,7 @@ define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64>* %p0) {
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm4, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -3807,24 +3807,23 @@ define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) {
|
||||
define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v8i32_v8i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm5
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483903,2147483903,2147483903,2147483903]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm6
|
||||
; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm4
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903,2147483903,2147483903]
|
||||
; SSE2-NEXT: movdqa %xmm5, %xmm6
|
||||
; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
|
||||
; SSE2-NEXT: pand %xmm6, %xmm1
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm6
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm6
|
||||
; SSE2-NEXT: por %xmm1, %xmm6
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm4
|
||||
; SSE2-NEXT: pcmpgtd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE2-NEXT: por %xmm0, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm6, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
||||
; SSE2-NEXT: pand %xmm5, %xmm0
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm5
|
||||
; SSE2-NEXT: por %xmm5, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm6, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v8i32_v8i8:
|
||||
|
Loading…
x
Reference in New Issue
Block a user