1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[X86] Emit SBB instead of SETCC_CARRY from LowerSELECT. Break false dependency on the SBB input.

I'm hoping we can just replace SETCC_CARRY with SBB. This is another step towards that.

I've explicitly used zero as the input to the setcc to avoid a false dependency that we've had with the SETCC_CARRY. I changed one of the patterns that used NEG to instead use an explicit compare with 0 on the LHS. We needed the zero anyway to avoid the false dependency. The negate would clobber its input register. By using a CMP we can avoid that which could be useful.

Differential Revision: https://reviews.llvm.org/D55414

llvm-svn: 348959
This commit is contained in:
Craig Topper 2018-12-12 19:20:21 +00:00
parent 57e597b314
commit 0f93413ef8
7 changed files with 154 additions and 119 deletions

View File

@ -19802,22 +19802,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x == 0), 0, -1) -> neg & sbb
if (isNullConstant(Y) &&
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8),
SDValue(Neg.getNode(), 1));
return Res;
SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
Zero = DAG.getConstant(0, DL, Op.getValueType());
return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
SDValue Res = // Res = 0 or -1.
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());

View File

@ -362,6 +362,21 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
(SETBr)>;
// Patterns to give priority when both inputs are zero so that we don't use
// an immediate for the RHS.
// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS),
(SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit),
(EXTRACT_SUBREG (MOV32r0), sub_8bit))>;
def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS),
(SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit),
(EXTRACT_SUBREG (MOV32r0), sub_16bit))>;
def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS),
(SBB32rr (MOV32r0), (MOV32r0))>;
def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS),
(SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit),
(SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>;
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//

View File

@ -5,6 +5,7 @@ define void @test3(i32 %c, <64 x i1>* %ptr) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: sbbl %ecx, %ecx
; CHECK-NEXT: kmovd %ecx, %k0

View File

@ -697,8 +697,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; ILP-NEXT: cmpq %rdi, %rsi
; ILP-NEXT: sbbq $0, %rdx
; ILP-NEXT: movl $0, %edx
; ILP-NEXT: sbbq $0, %rdx
; ILP-NEXT: sbbq $0, %rcx
; ILP-NEXT: sbbq %rdx, %rdx
; ILP-NEXT: sbbq %rcx, %rcx
; ILP-NEXT: setae %cl
; ILP-NEXT: movzbl %cl, %ecx
; ILP-NEXT: subq %rcx, %rax
@ -713,8 +713,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; HYBRID-NEXT: cmpq %rdi, %rsi
; HYBRID-NEXT: sbbq $0, %rcx
; HYBRID-NEXT: movl $0, %ecx
; HYBRID-NEXT: sbbq $0, %rcx
; HYBRID-NEXT: sbbq $0, %rax
; HYBRID-NEXT: sbbq %rcx, %rcx
; HYBRID-NEXT: sbbq %rax, %rax
; HYBRID-NEXT: setae %al
; HYBRID-NEXT: movzbl %al, %ecx
; HYBRID-NEXT: movl $2, %eax
@ -730,8 +730,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; BURR-NEXT: cmpq %rdi, %rsi
; BURR-NEXT: sbbq $0, %rcx
; BURR-NEXT: movl $0, %ecx
; BURR-NEXT: sbbq $0, %rcx
; BURR-NEXT: sbbq $0, %rax
; BURR-NEXT: sbbq %rcx, %rcx
; BURR-NEXT: sbbq %rax, %rax
; BURR-NEXT: setae %al
; BURR-NEXT: movzbl %al, %ecx
; BURR-NEXT: movl $2, %eax
@ -747,8 +747,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; SRC-NEXT: cmpq %rdi, %rsi
; SRC-NEXT: sbbq $0, %rax
; SRC-NEXT: movl $0, %eax
; SRC-NEXT: sbbq $0, %rax
; SRC-NEXT: sbbq $0, %rcx
; SRC-NEXT: sbbq %rax, %rax
; SRC-NEXT: sbbq %rcx, %rcx
; SRC-NEXT: setae %al
; SRC-NEXT: movzbl %al, %ecx
; SRC-NEXT: movl $2, %eax
@ -765,8 +765,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; LIN-NEXT: cmpq %rdi, %rsi
; LIN-NEXT: sbbq $0, %rdx
; LIN-NEXT: movl $0, %edx
; LIN-NEXT: sbbq $0, %rdx
; LIN-NEXT: sbbq $0, %rcx
; LIN-NEXT: sbbq %rdx, %rdx
; LIN-NEXT: sbbq %rcx, %rcx
; LIN-NEXT: setae %cl
; LIN-NEXT: movzbl %cl, %ecx
; LIN-NEXT: subq %rcx, %rax

View File

@ -624,21 +624,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
;; Test integer select between values and constants.
define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; GENERIC-LABEL: test9:
; GENERIC: ## %bb.0:
; GENERIC-NEXT: cmpq $1, %rdi
; GENERIC-NEXT: sbbq %rax, %rax
; GENERIC-NEXT: orq %rsi, %rax
; GENERIC-NEXT: retq
;
; ATOM-LABEL: test9:
; ATOM: ## %bb.0:
; ATOM-NEXT: cmpq $1, %rdi
; ATOM-NEXT: sbbq %rax, %rax
; ATOM-NEXT: orq %rsi, %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
; CHECK-LABEL: test9:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $1, %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: retq
;
; ATHLON-LABEL: test9:
; ATHLON: ## %bb.0:
@ -672,21 +664,13 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
;; Same as test9
define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; GENERIC-LABEL: test9a:
; GENERIC: ## %bb.0:
; GENERIC-NEXT: cmpq $1, %rdi
; GENERIC-NEXT: sbbq %rax, %rax
; GENERIC-NEXT: orq %rsi, %rax
; GENERIC-NEXT: retq
;
; ATOM-LABEL: test9a:
; ATOM: ## %bb.0:
; ATOM-NEXT: cmpq $1, %rdi
; ATOM-NEXT: sbbq %rax, %rax
; ATOM-NEXT: orq %rsi, %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
; CHECK-LABEL: test9a:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $1, %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: retq
;
; ATHLON-LABEL: test9a:
; ATHLON: ## %bb.0:
@ -803,6 +787,7 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK-LABEL: test11:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $1, %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: notq %rax
@ -842,6 +827,7 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK-LABEL: test11a:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $1, %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: notq %rax

View File

@ -15,6 +15,7 @@ define i32 @PR29058(i8 %x, i32 %y) {
; CHECK-NEXT: testb %dil, %dil
; CHECK-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE
; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: cmpb $1, %dil
; CHECK-NEXT: sbbb %dl, %dl
; CHECK-NEXT: orb %dl, %cl

View File

@ -50,8 +50,9 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX-LABEL: test_v4f64_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vmovmskpd %ymm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: vmovmskpd %ymm0, %ecx
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@ -83,9 +84,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
; SSE-NEXT: cmpltpd %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
; SSE-NEXT: movmskps %xmm2, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: cltq
; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: cmpl %eax, %ecx
; SSE-NEXT: sbbl %ecx, %ecx
; SSE-NEXT: movslq %ecx, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f64_legal_sext:
@ -94,9 +96,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovmskps %xmm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: cltq
; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: cmpl %eax, %ecx
; AVX-NEXT: sbbl %ecx, %ecx
; AVX-NEXT: movslq %ecx, %rax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@ -128,16 +131,18 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_v4f32_sext:
; SSE: # %bb.0:
; SSE-NEXT: cmpltps %xmm0, %xmm1
; SSE-NEXT: movmskps %xmm1, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: movmskps %xmm1, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovmskps %xmm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: vmovmskps %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
@ -166,16 +171,18 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: orps %xmm3, %xmm2
; SSE-NEXT: movmskps %xmm2, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: movmskps %xmm2, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vmovmskps %ymm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: vmovmskps %ymm0, %ecx
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@ -210,8 +217,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: pmovmskb %xmm2, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@ -220,8 +228,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: vpmovmskb %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@ -303,8 +312,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vmovmskpd %ymm0, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vmovmskpd %ymm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -312,8 +322,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX2-LABEL: test_v4i64_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vmovmskpd %ymm0, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovmskpd %ymm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -345,9 +356,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; SSE-NEXT: pcmpgtq %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: cltq
; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: cmpl %eax, %ecx
; SSE-NEXT: sbbl %ecx, %ecx
; SSE-NEXT: movslq %ecx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v4i64_legal_sext:
@ -358,9 +370,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: cltq
; AVX1-NEXT: xorl %ecx, %ecx
; AVX1-NEXT: cmpl %eax, %ecx
; AVX1-NEXT: sbbl %ecx, %ecx
; AVX1-NEXT: movslq %ecx, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -370,9 +383,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm0, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: cltq
; AVX2-NEXT: xorl %ecx, %ecx
; AVX2-NEXT: cmpl %eax, %ecx
; AVX2-NEXT: sbbl %ecx, %ecx
; AVX2-NEXT: movslq %ecx, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -404,16 +418,18 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: test_v4i32_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: movmskps %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovmskps %xmm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: vmovmskps %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
@ -442,8 +458,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: movmskps %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@ -454,8 +471,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vmovmskps %ymm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -463,8 +481,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX2-LABEL: test_v8i32_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vmovmskps %ymm0, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovmskps %ymm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -499,8 +518,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: pmovmskb %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@ -511,8 +531,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -522,8 +543,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpmovmskb %xmm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -559,8 +581,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_v8i16_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: pmovmskb %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@ -568,8 +591,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
; AVX-LABEL: test_v8i16_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: vpmovmskb %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@ -604,8 +628,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: pmovmskb %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@ -632,8 +657,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-LABEL: test_v16i16_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpmovmskb %ymm0, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@ -674,8 +700,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: pmovmskb %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@ -687,8 +714,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@ -699,8 +727,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpmovmskb %xmm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@ -742,8 +771,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_v16i8_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: pmovmskb %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
@ -751,8 +781,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; AVX-LABEL: test_v16i8_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: negl %eax
; AVX-NEXT: vpmovmskb %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@ -791,8 +822,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; SSE-NEXT: pcmpgtb %xmm3, %xmm1
; SSE-NEXT: pcmpgtb %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
; SSE-NEXT: negl %eax
; SSE-NEXT: pmovmskb %xmm0, %ecx
; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
@ -821,8 +853,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_v32i8_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpmovmskb %ymm0, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper