mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Improve lowering of v2i64 sign bit tests on pre-sse4.2 targets
Without sse4.2 a v2i64 setlt needs to expand into a pcmpgtd, pcmpeqd, 3 shuffles, and 2 logic ops. But if we're only interested in the sign bit of the i64 elements, we can just use one pcmpgtd and shuffle the odd elements to the even elements. Differential Revision: https://reviews.llvm.org/D72302
This commit is contained in:
parent
f330417d5f
commit
8548edec3f
@ -21584,6 +21584,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
||||
if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
|
||||
assert(Subtarget.hasSSE2() && "Don't know how to lower!");
|
||||
|
||||
// Special case for sign bit test. We can use a v4i32 PCMPGT and shuffle
|
||||
// the odd elements over the even elements.
|
||||
if (!FlipSigns && !Invert && ISD::isBuildVectorAllZeros(Op0.getNode())) {
|
||||
Op0 = DAG.getConstant(0, dl, MVT::v4i32);
|
||||
Op1 = DAG.getBitcast(MVT::v4i32, Op1);
|
||||
|
||||
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
|
||||
static const int MaskHi[] = { 1, 1, 3, 3 };
|
||||
SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
|
||||
|
||||
return DAG.getBitcast(VT, Result);
|
||||
}
|
||||
|
||||
// Since SSE has no unsigned integer comparisons, we need to flip the sign
|
||||
// bits of the inputs before performing those operations. The lower
|
||||
// compare is always unsigned.
|
||||
|
@ -429,48 +429,11 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
|
||||
define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
|
||||
; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
|
||||
; SSE2-SSSE3: # %bb.0:
|
||||
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
|
||||
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
|
||||
; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
|
||||
; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: por %xmm3, %xmm5
|
||||
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
|
||||
; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
|
||||
; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
|
||||
; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: pand %xmm6, %xmm7
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: por %xmm7, %xmm2
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm2
|
||||
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
|
||||
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
|
||||
; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm1
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
|
||||
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
|
||||
; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
|
||||
; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
|
||||
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-SSSE3-NEXT: movzbl %al, %ecx
|
||||
; SSE2-SSSE3-NEXT: shrl $4, %ecx
|
||||
; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
|
||||
|
@ -1015,48 +1015,11 @@ define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
|
||||
define i1 @allones_v8i64_sign(<8 x i64> %arg) {
|
||||
; SSE2-LABEL: allones_v8i64_sign:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm6, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm3, %xmm5
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm6, %xmm7
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm7, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm5, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm3
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm5, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: cmpb $-1, %al
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
@ -1113,48 +1076,11 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) {
|
||||
define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
|
||||
; SSE2-LABEL: allzeros_v8i64_sign:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm6, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm3, %xmm5
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm6, %xmm7
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm7, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm5, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm3
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm5, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -810,27 +810,22 @@ define <2 x i32> @saddo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE-NEXT: pxor %xmm2, %xmm3
|
||||
; SSE-NEXT: paddq %xmm1, %xmm0
|
||||
; SSE-NEXT: movdqa %xmm0, (%rdi)
|
||||
; SSE-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE-NEXT: pxor %xmm0, %xmm2
|
||||
; SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE-NEXT: pcmpgtd %xmm0, %xmm4
|
||||
; SSE-NEXT: pcmpgtd %xmm2, %xmm4
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
|
||||
; SSE-NEXT: pcmpeqd %xmm3, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; SSE-NEXT: pand %xmm5, %xmm0
|
||||
; SSE-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; SSE-NEXT: pand %xmm5, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
|
||||
; SSE-NEXT: por %xmm0, %xmm3
|
||||
; SSE-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
|
||||
; SSE-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; SSE-NEXT: pand %xmm4, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: pxor %xmm3, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; SSE-NEXT: por %xmm2, %xmm3
|
||||
; SSE-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
|
||||
; SSE-NEXT: pxor %xmm3, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; SSE-NEXT: movdqa %xmm0, (%rdi)
|
||||
; SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: saddo_v2i64:
|
||||
|
Loading…
x
Reference in New Issue
Block a user