1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[x86] avoid flipping sign bits for vector icmp by using known bits

If we know that both operands of an unsigned integer vector comparison are non-negative, 
then it's safe to directly use a signed-compare-greater-than instruction (the only non-equality
integer vector compare predicate provided by SSE/AVX).

We're intentionally not changing the condition code to signed in order to preserve the
existing transforms that use min/max/psubus below here.

This should solve PR33276:
https://bugs.llvm.org/show_bug.cgi?id=33276

Differential Revision: https://reviews.llvm.org/D33862

llvm-svn: 304909
This commit is contained in:
Sanjay Patel 2017-06-07 13:46:34 +00:00
parent ff912dafed
commit 6d2c5742d0
3 changed files with 41 additions and 105 deletions

View File

@ -17184,7 +17184,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Cond == ISD::SETGE || Cond == ISD::SETUGE;
bool Invert = Cond == ISD::SETNE ||
(Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
bool FlipSigns = ISD::isUnsignedIntSetCC(Cond);
// If both operands are known non-negative, then an unsigned compare is the
// same as a signed compare and there's no need to flip signbits.
// TODO: We could check for more general simplifications here since we're
// computing known bits.
bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
!(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();

View File

@ -1549,8 +1549,6 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpextrb $8, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax
@ -1579,8 +1577,6 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; NOVL-NEXT: retq

View File

@ -13,7 +13,7 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
@ -30,9 +30,6 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@ -46,7 +43,7 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
@ -63,9 +60,6 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@ -79,7 +73,7 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
@ -98,9 +92,6 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@ -116,7 +107,7 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
@ -135,9 +126,6 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@ -153,31 +141,15 @@ define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: psrld $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: ugt_v4i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_v4i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: ugt_v4i32:
; AVX: # BB#0:
; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
%cmp = icmp ugt <4 x i32> %sh1, %sh2
@ -189,32 +161,16 @@ define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: psrld $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm2
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: pcmpgtd %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: ult_v4i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_v4i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: ult_v4i32:
; AVX: # BB#0:
; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
%cmp = icmp ult <4 x i32> %sh1, %sh2
@ -226,12 +182,9 @@ define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2: # BB#0:
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: psrld $1, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: uge_v4i32:
@ -260,9 +213,6 @@ define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2: # BB#0:
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: psrld $1, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm1, %xmm0
@ -294,9 +244,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -304,9 +251,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@ -320,20 +264,14 @@ define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm2
; SSE-NEXT: pcmpgtw %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: pcmpgtw %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ult_v8i16:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@ -408,22 +346,20 @@ define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE-LABEL: ugt_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE-NEXT: por %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ugt_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@ -436,11 +372,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE-LABEL: ult_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
; SSE-NEXT: pand {{.*}}(%rip), %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE-NEXT: por %xmm2, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm2
; SSE-NEXT: pand %xmm1, %xmm2
; SSE-NEXT: pcmpgtb %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retq
@ -448,11 +383,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
; AVX-LABEL: ult_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>