mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
[SDAG] commute setcc operands to match a subtract
If we have: R = sub X, Y P = cmp Y, X ...then flipping the operands in the compare instruction can allow using a subtract that sets compare flags. Motivated by diffs in D58875 - not sure if this changes anything there, but this seems like a good thing independent of that. There's a more involved version of this transform already in IR (in instcombine although that seems misplaced to me) - see "swapMayExposeCSEOpportunities()". Differential Revision: https://reviews.llvm.org/D63958 llvm-svn: 365711
This commit is contained in:
parent
1998d38e13
commit
f458c125da
@ -2674,6 +2674,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
||||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
|
||||
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
|
||||
|
||||
// If we have a subtract with the same 2 non-constant operands as this setcc
|
||||
// -- but in reverse order -- then try to commute the operands of this setcc
|
||||
// to match. A matching pair of setcc (cmp) and sub may be combined into 1
|
||||
// instruction on some targets.
|
||||
if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
|
||||
(DCI.isBeforeLegalizeOps() ||
|
||||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
|
||||
DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
|
||||
!DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
|
||||
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
|
||||
|
||||
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
|
||||
const APInt &C1 = N1C->getAPIntValue();
|
||||
|
||||
|
@ -1037,15 +1037,14 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
|
||||
; ENABLE-NEXT: lsl w8, w0, w1
|
||||
; ENABLE-NEXT: lsl w9, w1, w0
|
||||
; ENABLE-NEXT: lsr w10, w0, w1
|
||||
; ENABLE-NEXT: lsr w11, w1, w0
|
||||
; ENABLE-NEXT: add w12, w1, w0
|
||||
; ENABLE-NEXT: sub w13, w1, w0
|
||||
; ENABLE-NEXT: cmp w0, w1
|
||||
; ENABLE-NEXT: add w17, w8, w9
|
||||
; ENABLE-NEXT: sub w16, w9, w10
|
||||
; ENABLE-NEXT: add w15, w10, w11
|
||||
; ENABLE-NEXT: add w14, w11, w12
|
||||
; ENABLE-NEXT: b.ge LBB14_2
|
||||
; ENABLE-NEXT: lsr w12, w1, w0
|
||||
; ENABLE-NEXT: add w15, w1, w0
|
||||
; ENABLE-NEXT: subs w17, w1, w0
|
||||
; ENABLE-NEXT: sub w11, w9, w10
|
||||
; ENABLE-NEXT: add w16, w8, w9
|
||||
; ENABLE-NEXT: add w13, w10, w12
|
||||
; ENABLE-NEXT: add w14, w12, w15
|
||||
; ENABLE-NEXT: b.le LBB14_2
|
||||
; ENABLE-NEXT: ; %bb.1: ; %true
|
||||
; ENABLE-NEXT: str w0, [sp]
|
||||
; ENABLE-NEXT: ; InlineAsm Start
|
||||
@ -1055,12 +1054,12 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
|
||||
; ENABLE-NEXT: str w8, [x2]
|
||||
; ENABLE-NEXT: str w9, [x3]
|
||||
; ENABLE-NEXT: str w10, [x4]
|
||||
; ENABLE-NEXT: str w11, [x5]
|
||||
; ENABLE-NEXT: str w12, [x6]
|
||||
; ENABLE-NEXT: str w13, [x7]
|
||||
; ENABLE-NEXT: str w12, [x5]
|
||||
; ENABLE-NEXT: str w15, [x6]
|
||||
; ENABLE-NEXT: str w17, [x7]
|
||||
; ENABLE-NEXT: stp w0, w1, [x2, #4]
|
||||
; ENABLE-NEXT: stp w17, w16, [x2, #12]
|
||||
; ENABLE-NEXT: stp w15, w14, [x2, #20]
|
||||
; ENABLE-NEXT: stp w16, w11, [x2, #12]
|
||||
; ENABLE-NEXT: stp w13, w14, [x2, #20]
|
||||
; ENABLE-NEXT: sub sp, x29, #80 ; =80
|
||||
; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
|
||||
; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
|
||||
@ -1097,15 +1096,14 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
|
||||
; DISABLE-NEXT: lsl w8, w0, w1
|
||||
; DISABLE-NEXT: lsl w9, w1, w0
|
||||
; DISABLE-NEXT: lsr w10, w0, w1
|
||||
; DISABLE-NEXT: lsr w11, w1, w0
|
||||
; DISABLE-NEXT: add w12, w1, w0
|
||||
; DISABLE-NEXT: sub w13, w1, w0
|
||||
; DISABLE-NEXT: cmp w0, w1
|
||||
; DISABLE-NEXT: add w17, w8, w9
|
||||
; DISABLE-NEXT: sub w16, w9, w10
|
||||
; DISABLE-NEXT: add w15, w10, w11
|
||||
; DISABLE-NEXT: add w14, w11, w12
|
||||
; DISABLE-NEXT: b.ge LBB14_2
|
||||
; DISABLE-NEXT: lsr w12, w1, w0
|
||||
; DISABLE-NEXT: add w15, w1, w0
|
||||
; DISABLE-NEXT: subs w17, w1, w0
|
||||
; DISABLE-NEXT: sub w11, w9, w10
|
||||
; DISABLE-NEXT: add w16, w8, w9
|
||||
; DISABLE-NEXT: add w13, w10, w12
|
||||
; DISABLE-NEXT: add w14, w12, w15
|
||||
; DISABLE-NEXT: b.le LBB14_2
|
||||
; DISABLE-NEXT: ; %bb.1: ; %true
|
||||
; DISABLE-NEXT: str w0, [sp]
|
||||
; DISABLE-NEXT: ; InlineAsm Start
|
||||
@ -1115,12 +1113,12 @@ define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3,
|
||||
; DISABLE-NEXT: str w8, [x2]
|
||||
; DISABLE-NEXT: str w9, [x3]
|
||||
; DISABLE-NEXT: str w10, [x4]
|
||||
; DISABLE-NEXT: str w11, [x5]
|
||||
; DISABLE-NEXT: str w12, [x6]
|
||||
; DISABLE-NEXT: str w13, [x7]
|
||||
; DISABLE-NEXT: str w12, [x5]
|
||||
; DISABLE-NEXT: str w15, [x6]
|
||||
; DISABLE-NEXT: str w17, [x7]
|
||||
; DISABLE-NEXT: stp w0, w1, [x2, #4]
|
||||
; DISABLE-NEXT: stp w17, w16, [x2, #12]
|
||||
; DISABLE-NEXT: stp w15, w14, [x2, #20]
|
||||
; DISABLE-NEXT: stp w16, w11, [x2, #12]
|
||||
; DISABLE-NEXT: stp w13, w14, [x2, #20]
|
||||
; DISABLE-NEXT: sub sp, x29, #80 ; =80
|
||||
; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
|
||||
; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
|
||||
|
@ -21,11 +21,9 @@ define i1 @usubo_ult_i64(i64 %x, i64 %y, i64* %p) nounwind {
|
||||
define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) nounwind {
|
||||
; CHECK-LABEL: usubo_ugt_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w1, w0
|
||||
; CHECK-NEXT: cset w8, hi
|
||||
; CHECK-NEXT: sub w9, w0, w1
|
||||
; CHECK-NEXT: mov w0, w8
|
||||
; CHECK-NEXT: str w9, [x2]
|
||||
; CHECK-NEXT: subs w8, w0, w1
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
; CHECK-NEXT: str w8, [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%ov = icmp ugt i32 %y, %x
|
||||
%s = sub i32 %x, %y
|
||||
|
@ -25,7 +25,7 @@ define i32 @g(i32 inreg %a, i32 inreg %b) nounwind ssp {
|
||||
; CHECK-NEXT: add %sp, 0x8, %fp
|
||||
; CHECK-NEXT: sub %sp, 0x8, %sp
|
||||
; CHECK-NEXT: sub.f %r7, %r6, %r3
|
||||
; CHECK-NEXT: sel.lt %r3, %r0, %rv
|
||||
; CHECK-NEXT: sel.gt %r3, %r0, %rv
|
||||
; CHECK-NEXT: ld -4[%fp], %pc ! return
|
||||
; CHECK-NEXT: add %fp, 0x0, %sp
|
||||
; CHECK-NEXT: ld -8[%fp], %fp
|
||||
@ -59,7 +59,7 @@ define i32 @i(i32 inreg %a, i32 inreg %b) nounwind readnone ssp {
|
||||
; CHECK-NEXT: add %sp, 0x8, %fp
|
||||
; CHECK-NEXT: sub %sp, 0x8, %sp
|
||||
; CHECK-NEXT: sub.f %r7, %r6, %r3
|
||||
; CHECK-NEXT: sel.ult %r3, %r0, %rv
|
||||
; CHECK-NEXT: sel.ugt %r3, %r0, %rv
|
||||
; CHECK-NEXT: ld -4[%fp], %pc ! return
|
||||
; CHECK-NEXT: add %fp, 0x0, %sp
|
||||
; CHECK-NEXT: ld -8[%fp], %fp
|
||||
@ -75,11 +75,11 @@ define i32 @j(i32 inreg %a, i32 inreg %b) nounwind {
|
||||
; CHECK: ! %bb.0: ! %entry
|
||||
; CHECK-NEXT: st %fp, [--%sp]
|
||||
; CHECK-NEXT: add %sp, 0x8, %fp
|
||||
; CHECK-NEXT: sub %sp, 0x8, %sp
|
||||
; CHECK-NEXT: sub.f %r7, %r6, %r0
|
||||
; CHECK-NEXT: sub.f %r6, %r7, %rv
|
||||
; CHECK-NEXT: bne .LBB4_2
|
||||
; CHECK-NEXT: sub %r6, %r7, %rv
|
||||
; CHECK-NEXT: sub %sp, 0x8, %sp
|
||||
; CHECK-NEXT: .LBB4_1: ! %if.then
|
||||
; CHECK-NEXT: sub.f %r7, %r6, %r0
|
||||
; CHECK-NEXT: sel.gt %rv, %r6, %rv
|
||||
; CHECK-NEXT: .LBB4_2: ! %if.else
|
||||
; CHECK-NEXT: ld -4[%fp], %pc ! return
|
||||
|
@ -48,11 +48,10 @@ define i32 @func_g(i32 %a, i32 %b) nounwind {
|
||||
define i32 @func_h(i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: func_h:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: subl %ecx, %eax
|
||||
; CHECK-NEXT: cmovlel %edx, %eax
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmovlel %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%cmp = icmp slt i32 %b, %a
|
||||
%sub = sub nsw i32 %a, %b
|
||||
@ -91,11 +90,10 @@ define i32 @func_j(i32 %a, i32 %b) nounwind {
|
||||
define i32 @func_k(i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: func_k:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: subl %ecx, %eax
|
||||
; CHECK-NEXT: cmovbel %edx, %eax
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmovbel %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%cmp = icmp ult i32 %b, %a
|
||||
%sub = sub i32 %a, %b
|
||||
@ -108,10 +106,9 @@ define i32 @func_l(i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: func_l:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl %edx, %eax
|
||||
; CHECK-NEXT: subl %ecx, %eax
|
||||
; CHECK-NEXT: cmovlel %edx, %eax
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmovlel %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%cmp = icmp slt i32 %b, %a
|
||||
%sub = sub nsw i32 %a, %b
|
||||
@ -139,16 +136,14 @@ define i32 @func_l2(i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: func_l2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl %eax, %ecx
|
||||
; CHECK-NEXT: subl %edx, %ecx
|
||||
; CHECK-NEXT: cmpl %eax, %edx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: subl %edx, %eax
|
||||
; CHECK-NEXT: jne .LBB8_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: cmovgl %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
; CHECK-NEXT: cmpl %ecx, %edx
|
||||
; CHECK-NEXT: cmovlel %ecx, %eax
|
||||
; CHECK-NEXT: .LBB8_2: # %if.else
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%cmp = icmp eq i32 %b, %a
|
||||
%sub = sub nsw i32 %a, %b
|
||||
@ -166,9 +161,8 @@ if.else:
|
||||
define i32 @func_l3(i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: func_l3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: subl %ecx, %eax
|
||||
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: jge .LBB9_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: retl
|
||||
@ -192,11 +186,10 @@ if.else:
|
||||
define i32 @func_l4(i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: func_l4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: subl %ecx, %eax
|
||||
; CHECK-NEXT: cmovll %edx, %eax
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmovll %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%cmp = icmp sgt i32 %b, %a
|
||||
%sub = sub i32 %a, %b
|
||||
|
@ -1038,14 +1038,14 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE41-NEXT: pmaxud %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm4
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE41-NEXT: pminud %xmm1, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
|
||||
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
|
||||
; SSE41-NEXT: pxor %xmm5, %xmm4
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm6
|
||||
; SSE41-NEXT: pmaxud %xmm3, %xmm6
|
||||
; SSE41-NEXT: pcmpeqd %xmm2, %xmm6
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm6
|
||||
; SSE41-NEXT: pminud %xmm2, %xmm6
|
||||
; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
|
||||
; SSE41-NEXT: pxor %xmm5, %xmm6
|
||||
; SSE41-NEXT: packssdw %xmm6, %xmm4
|
||||
; SSE41-NEXT: psubd %xmm2, %xmm3
|
||||
@ -1062,15 +1062,15 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpmaxud %xmm0, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
|
||||
; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm5
|
||||
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpandn %xmm0, %xmm3, %xmm0
|
||||
; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm0, %xmm5, %xmm0
|
||||
; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm1, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
@ -1081,8 +1081,8 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
|
||||
; AVX2-LABEL: test16:
|
||||
; AVX2: # %bb.0: # %vector.ph
|
||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmaxud %ymm0, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
@ -1094,7 +1094,7 @@ define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
|
||||
; AVX512-LABEL: test16:
|
||||
; AVX512: # %bb.0: # %vector.ph
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512-NEXT: vpcmpltud %ymm0, %ymm1, %k1
|
||||
; AVX512-NEXT: vpcmpnleud %ymm1, %ymm0, %k1
|
||||
; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
Loading…
Reference in New Issue
Block a user