1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[SDAG] commute setcc operands to match a subtract

If we have:

R = sub X, Y
P = cmp Y, X

...then flipping the operands in the compare instruction can allow using a subtract that sets compare flags.

Motivated by diffs in D58875 - not sure if this changes anything there,
but this seems like a good thing independent of that.

There's a more involved version of this transform already in IR (in instcombine
although that seems misplaced to me) - see "swapMayExposeCSEOpportunities()".

Differential Revision: https://reviews.llvm.org/D63958

llvm-svn: 365711
This commit is contained in:
Sanjay Patel 2019-07-10 23:23:54 +00:00
parent 1998d38e13
commit f458c125da
6 changed files with 80 additions and 80 deletions

View File

@ -2674,6 +2674,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
// If we have a subtract with the same 2 non-constant operands as this setcc
// -- but in reverse order -- then try to commute the operands of this setcc
// to match. A matching pair of setcc (cmp) and sub may be combined into 1
// instruction on some targets.
if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
!DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue(); const APInt &C1 = N1C->getAPIntValue();

View File

@ -1037,15 +1037,14 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
; ENABLE-NEXT: lsl w8, w0, w1 ; ENABLE-NEXT: lsl w8, w0, w1
; ENABLE-NEXT: lsl w9, w1, w0 ; ENABLE-NEXT: lsl w9, w1, w0
; ENABLE-NEXT: lsr w10, w0, w1 ; ENABLE-NEXT: lsr w10, w0, w1
; ENABLE-NEXT: lsr w11, w1, w0 ; ENABLE-NEXT: lsr w12, w1, w0
; ENABLE-NEXT: add w12, w1, w0 ; ENABLE-NEXT: add w15, w1, w0
; ENABLE-NEXT: sub w13, w1, w0 ; ENABLE-NEXT: subs w17, w1, w0
; ENABLE-NEXT: cmp w0, w1 ; ENABLE-NEXT: sub w11, w9, w10
; ENABLE-NEXT: add w17, w8, w9 ; ENABLE-NEXT: add w16, w8, w9
; ENABLE-NEXT: sub w16, w9, w10 ; ENABLE-NEXT: add w13, w10, w12
; ENABLE-NEXT: add w15, w10, w11 ; ENABLE-NEXT: add w14, w12, w15
; ENABLE-NEXT: add w14, w11, w12 ; ENABLE-NEXT: b.le LBB14_2
; ENABLE-NEXT: b.ge LBB14_2
; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: ; %bb.1: ; %true
; ENABLE-NEXT: str w0, [sp] ; ENABLE-NEXT: str w0, [sp]
; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: ; InlineAsm Start
@ -1055,12 +1054,12 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
; ENABLE-NEXT: str w8, [x2] ; ENABLE-NEXT: str w8, [x2]
; ENABLE-NEXT: str w9, [x3] ; ENABLE-NEXT: str w9, [x3]
; ENABLE-NEXT: str w10, [x4] ; ENABLE-NEXT: str w10, [x4]
; ENABLE-NEXT: str w11, [x5] ; ENABLE-NEXT: str w12, [x5]
; ENABLE-NEXT: str w12, [x6] ; ENABLE-NEXT: str w15, [x6]
; ENABLE-NEXT: str w13, [x7] ; ENABLE-NEXT: str w17, [x7]
; ENABLE-NEXT: stp w0, w1, [x2, #4] ; ENABLE-NEXT: stp w0, w1, [x2, #4]
; ENABLE-NEXT: stp w17, w16, [x2, #12] ; ENABLE-NEXT: stp w16, w11, [x2, #12]
; ENABLE-NEXT: stp w15, w14, [x2, #20] ; ENABLE-NEXT: stp w13, w14, [x2, #20]
; ENABLE-NEXT: sub sp, x29, #80 ; =80 ; ENABLE-NEXT: sub sp, x29, #80 ; =80
; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload ; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload ; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
@ -1097,15 +1096,14 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
; DISABLE-NEXT: lsl w8, w0, w1 ; DISABLE-NEXT: lsl w8, w0, w1
; DISABLE-NEXT: lsl w9, w1, w0 ; DISABLE-NEXT: lsl w9, w1, w0
; DISABLE-NEXT: lsr w10, w0, w1 ; DISABLE-NEXT: lsr w10, w0, w1
; DISABLE-NEXT: lsr w11, w1, w0 ; DISABLE-NEXT: lsr w12, w1, w0
; DISABLE-NEXT: add w12, w1, w0 ; DISABLE-NEXT: add w15, w1, w0
; DISABLE-NEXT: sub w13, w1, w0 ; DISABLE-NEXT: subs w17, w1, w0
; DISABLE-NEXT: cmp w0, w1 ; DISABLE-NEXT: sub w11, w9, w10
; DISABLE-NEXT: add w17, w8, w9 ; DISABLE-NEXT: add w16, w8, w9
; DISABLE-NEXT: sub w16, w9, w10 ; DISABLE-NEXT: add w13, w10, w12
; DISABLE-NEXT: add w15, w10, w11 ; DISABLE-NEXT: add w14, w12, w15
; DISABLE-NEXT: add w14, w11, w12 ; DISABLE-NEXT: b.le LBB14_2
; DISABLE-NEXT: b.ge LBB14_2
; DISABLE-NEXT: ; %bb.1: ; %true ; DISABLE-NEXT: ; %bb.1: ; %true
; DISABLE-NEXT: str w0, [sp] ; DISABLE-NEXT: str w0, [sp]
; DISABLE-NEXT: ; InlineAsm Start ; DISABLE-NEXT: ; InlineAsm Start
@ -1115,12 +1113,12 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
; DISABLE-NEXT: str w8, [x2] ; DISABLE-NEXT: str w8, [x2]
; DISABLE-NEXT: str w9, [x3] ; DISABLE-NEXT: str w9, [x3]
; DISABLE-NEXT: str w10, [x4] ; DISABLE-NEXT: str w10, [x4]
; DISABLE-NEXT: str w11, [x5] ; DISABLE-NEXT: str w12, [x5]
; DISABLE-NEXT: str w12, [x6] ; DISABLE-NEXT: str w15, [x6]
; DISABLE-NEXT: str w13, [x7] ; DISABLE-NEXT: str w17, [x7]
; DISABLE-NEXT: stp w0, w1, [x2, #4] ; DISABLE-NEXT: stp w0, w1, [x2, #4]
; DISABLE-NEXT: stp w17, w16, [x2, #12] ; DISABLE-NEXT: stp w16, w11, [x2, #12]
; DISABLE-NEXT: stp w15, w14, [x2, #20] ; DISABLE-NEXT: stp w13, w14, [x2, #20]
; DISABLE-NEXT: sub sp, x29, #80 ; =80 ; DISABLE-NEXT: sub sp, x29, #80 ; =80
; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload ; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload ; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload

View File

@ -21,11 +21,9 @@ define i1 @usubo_ult_i64(i64 %x, i64 %y, i64* %p) nounwind {
define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) nounwind { define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) nounwind {
; CHECK-LABEL: usubo_ugt_i32: ; CHECK-LABEL: usubo_ugt_i32:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: cmp w1, w0 ; CHECK-NEXT: subs w8, w0, w1
; CHECK-NEXT: cset w8, hi ; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: sub w9, w0, w1 ; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: str w9, [x2]
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%ov = icmp ugt i32 %y, %x %ov = icmp ugt i32 %y, %x
%s = sub i32 %x, %y %s = sub i32 %x, %y

View File

@ -25,7 +25,7 @@ define i32 @g(i32 inreg %a, i32 inreg %b) nounwind ssp {
; CHECK-NEXT: add %sp, 0x8, %fp ; CHECK-NEXT: add %sp, 0x8, %fp
; CHECK-NEXT: sub %sp, 0x8, %sp ; CHECK-NEXT: sub %sp, 0x8, %sp
; CHECK-NEXT: sub.f %r7, %r6, %r3 ; CHECK-NEXT: sub.f %r7, %r6, %r3
; CHECK-NEXT: sel.lt %r3, %r0, %rv ; CHECK-NEXT: sel.gt %r3, %r0, %rv
; CHECK-NEXT: ld -4[%fp], %pc ! return ; CHECK-NEXT: ld -4[%fp], %pc ! return
; CHECK-NEXT: add %fp, 0x0, %sp ; CHECK-NEXT: add %fp, 0x0, %sp
; CHECK-NEXT: ld -8[%fp], %fp ; CHECK-NEXT: ld -8[%fp], %fp
@ -59,7 +59,7 @@ define i32 @i(i32 inreg %a, i32 inreg %b) nounwind readnone ssp {
; CHECK-NEXT: add %sp, 0x8, %fp ; CHECK-NEXT: add %sp, 0x8, %fp
; CHECK-NEXT: sub %sp, 0x8, %sp ; CHECK-NEXT: sub %sp, 0x8, %sp
; CHECK-NEXT: sub.f %r7, %r6, %r3 ; CHECK-NEXT: sub.f %r7, %r6, %r3
; CHECK-NEXT: sel.ult %r3, %r0, %rv ; CHECK-NEXT: sel.ugt %r3, %r0, %rv
; CHECK-NEXT: ld -4[%fp], %pc ! return ; CHECK-NEXT: ld -4[%fp], %pc ! return
; CHECK-NEXT: add %fp, 0x0, %sp ; CHECK-NEXT: add %fp, 0x0, %sp
; CHECK-NEXT: ld -8[%fp], %fp ; CHECK-NEXT: ld -8[%fp], %fp
@ -75,11 +75,11 @@ define i32 @j(i32 inreg %a, i32 inreg %b) nounwind {
; CHECK: ! %bb.0: ! %entry ; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: st %fp, [--%sp] ; CHECK-NEXT: st %fp, [--%sp]
; CHECK-NEXT: add %sp, 0x8, %fp ; CHECK-NEXT: add %sp, 0x8, %fp
; CHECK-NEXT: sub %sp, 0x8, %sp ; CHECK-NEXT: sub.f %r6, %r7, %rv
; CHECK-NEXT: sub.f %r7, %r6, %r0
; CHECK-NEXT: bne .LBB4_2 ; CHECK-NEXT: bne .LBB4_2
; CHECK-NEXT: sub %r6, %r7, %rv ; CHECK-NEXT: sub %sp, 0x8, %sp
; CHECK-NEXT: .LBB4_1: ! %if.then ; CHECK-NEXT: .LBB4_1: ! %if.then
; CHECK-NEXT: sub.f %r7, %r6, %r0
; CHECK-NEXT: sel.gt %rv, %r6, %rv ; CHECK-NEXT: sel.gt %rv, %r6, %rv
; CHECK-NEXT: .LBB4_2: ! %if.else ; CHECK-NEXT: .LBB4_2: ! %if.else
; CHECK-NEXT: ld -4[%fp], %pc ! return ; CHECK-NEXT: ld -4[%fp], %pc ! return

View File

@ -48,11 +48,10 @@ define i32 @func_g(i32 %a, i32 %b) nounwind {
define i32 @func_h(i32 %a, i32 %b) nounwind { define i32 @func_h(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_h: ; CHECK-LABEL: func_h:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: subl %ecx, %eax ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cmovlel %edx, %eax ; CHECK-NEXT: cmovlel %ecx, %eax
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%cmp = icmp slt i32 %b, %a %cmp = icmp slt i32 %b, %a
%sub = sub nsw i32 %a, %b %sub = sub nsw i32 %a, %b
@ -91,11 +90,10 @@ define i32 @func_j(i32 %a, i32 %b) nounwind {
define i32 @func_k(i32 %a, i32 %b) nounwind { define i32 @func_k(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_k: ; CHECK-LABEL: func_k:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: subl %ecx, %eax ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cmovbel %edx, %eax ; CHECK-NEXT: cmovbel %ecx, %eax
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%cmp = icmp ult i32 %b, %a %cmp = icmp ult i32 %b, %a
%sub = sub i32 %a, %b %sub = sub i32 %a, %b
@ -108,10 +106,9 @@ define i32 @func_l(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l: ; CHECK-LABEL: func_l:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: subl %ecx, %eax ; CHECK-NEXT: cmovlel %ecx, %eax
; CHECK-NEXT: cmovlel %edx, %eax
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%cmp = icmp slt i32 %b, %a %cmp = icmp slt i32 %b, %a
%sub = sub nsw i32 %a, %b %sub = sub nsw i32 %a, %b
@ -139,16 +136,14 @@ define i32 @func_l2(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l2: ; CHECK-LABEL: func_l2:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: subl %edx, %ecx ; CHECK-NEXT: subl %edx, %eax
; CHECK-NEXT: cmpl %eax, %edx
; CHECK-NEXT: jne .LBB8_2 ; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: cmovgl %ecx, %eax ; CHECK-NEXT: cmpl %ecx, %edx
; CHECK-NEXT: retl ; CHECK-NEXT: cmovlel %ecx, %eax
; CHECK-NEXT: .LBB8_2: # %if.else ; CHECK-NEXT: .LBB8_2: # %if.else
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%cmp = icmp eq i32 %b, %a %cmp = icmp eq i32 %b, %a
%sub = sub nsw i32 %a, %b %sub = sub nsw i32 %a, %b
@ -166,9 +161,8 @@ if.else:
define i32 @func_l3(i32 %a, i32 %b) nounwind { define i32 @func_l3(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l3: ; CHECK-LABEL: func_l3:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: subl %ecx, %eax ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: jge .LBB9_2 ; CHECK-NEXT: jge .LBB9_2
; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: retl ; CHECK-NEXT: retl
@ -192,11 +186,10 @@ if.else:
define i32 @func_l4(i32 %a, i32 %b) nounwind { define i32 @func_l4(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l4: ; CHECK-LABEL: func_l4:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: subl %ecx, %eax ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cmovll %edx, %eax ; CHECK-NEXT: cmovll %ecx, %eax
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%cmp = icmp sgt i32 %b, %a %cmp = icmp sgt i32 %b, %a
%sub = sub i32 %a, %b %sub = sub i32 %a, %b

View File

@ -1038,14 +1038,14 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: pmaxud %xmm0, %xmm4 ; SSE41-NEXT: pminud %xmm1, %xmm4
; SSE41-NEXT: pcmpeqd %xmm1, %xmm4 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
; SSE41-NEXT: pxor %xmm5, %xmm4 ; SSE41-NEXT: pxor %xmm5, %xmm4
; SSE41-NEXT: movdqa %xmm2, %xmm6 ; SSE41-NEXT: movdqa %xmm3, %xmm6
; SSE41-NEXT: pmaxud %xmm3, %xmm6 ; SSE41-NEXT: pminud %xmm2, %xmm6
; SSE41-NEXT: pcmpeqd %xmm2, %xmm6 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
; SSE41-NEXT: pxor %xmm5, %xmm6 ; SSE41-NEXT: pxor %xmm5, %xmm6
; SSE41-NEXT: packssdw %xmm6, %xmm4 ; SSE41-NEXT: packssdw %xmm6, %xmm4
; SSE41-NEXT: psubd %xmm2, %xmm3 ; SSE41-NEXT: psubd %xmm2, %xmm3
@ -1062,15 +1062,15 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vpmaxud %xmm0, %xmm1, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3 ; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm4
; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm5 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm5
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm5
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpandn %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpandn %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm1 ; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm1
; AVX1-NEXT: vpandn %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpandn %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@ -1081,8 +1081,8 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
; AVX2-LABEL: test16: ; AVX2-LABEL: test16:
; AVX2: # %bb.0: # %vector.ph ; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vpmaxud %ymm0, %ymm1, %ymm2 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
@ -1094,7 +1094,7 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
; AVX512-LABEL: test16: ; AVX512-LABEL: test16:
; AVX512: # %bb.0: # %vector.ph ; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512-NEXT: vpcmpltud %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpnleud %ymm1, %ymm0, %k1
; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} ; AVX512-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper