mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[X86] Emit SBB instead of SETCC_CARRY from LowerSELECT. Break false dependency on the SBB input.
I'm hoping we can just replace SETCC_CARRY with SBB. This is another step towards that. I've explicitly used zero as the input to the setcc to avoid a false dependency that we've had with the SETCC_CARRY. I changed one of the patterns that used NEG to instead use an explicit compare with 0 on the LHS. We needed the zero anyway to avoid the false dependency. The negate would clobber its input register. By using a CMP we can avoid that which could be useful. Differential Revision: https://reviews.llvm.org/D55414 llvm-svn: 348959
This commit is contained in:
parent
57e597b314
commit
0f93413ef8
@ -19802,22 +19802,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
// (select (x == 0), 0, -1) -> neg & sbb
|
||||
if (isNullConstant(Y) &&
|
||||
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
|
||||
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
|
||||
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
|
||||
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
|
||||
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
|
||||
DAG.getConstant(X86::COND_B, DL, MVT::i8),
|
||||
SDValue(Neg.getNode(), 1));
|
||||
return Res;
|
||||
SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
|
||||
Zero = DAG.getConstant(0, DL, Op.getValueType());
|
||||
return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
|
||||
}
|
||||
|
||||
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
|
||||
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
|
||||
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
|
||||
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
|
||||
SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
|
||||
SDValue Res = // Res = 0 or -1.
|
||||
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
|
||||
DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
|
||||
DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
|
||||
|
||||
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
|
||||
Res = DAG.getNOT(DL, Res, Res.getValueType());
|
||||
|
@ -362,6 +362,21 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
|
||||
(SETBr)>;
|
||||
|
||||
// Patterns to give priority when both inputs are zero so that we don't use
|
||||
// an immediate for the RHS.
|
||||
// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
|
||||
def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS),
|
||||
(SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit),
|
||||
(EXTRACT_SUBREG (MOV32r0), sub_8bit))>;
|
||||
def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS),
|
||||
(SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit),
|
||||
(EXTRACT_SUBREG (MOV32r0), sub_16bit))>;
|
||||
def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS),
|
||||
(SBB32rr (MOV32r0), (MOV32r0))>;
|
||||
def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS),
|
||||
(SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit),
|
||||
(SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// String Pseudo Instructions
|
||||
//
|
||||
|
@ -5,6 +5,7 @@ define void @test3(i32 %c, <64 x i1>* %ptr) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: sbbl %ecx, %ecx
|
||||
; CHECK-NEXT: kmovd %ecx, %k0
|
||||
|
@ -697,8 +697,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
||||
; ILP-NEXT: cmpq %rdi, %rsi
|
||||
; ILP-NEXT: sbbq $0, %rdx
|
||||
; ILP-NEXT: movl $0, %edx
|
||||
; ILP-NEXT: sbbq $0, %rdx
|
||||
; ILP-NEXT: sbbq $0, %rcx
|
||||
; ILP-NEXT: sbbq %rdx, %rdx
|
||||
; ILP-NEXT: sbbq %rcx, %rcx
|
||||
; ILP-NEXT: setae %cl
|
||||
; ILP-NEXT: movzbl %cl, %ecx
|
||||
; ILP-NEXT: subq %rcx, %rax
|
||||
@ -713,8 +713,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
||||
; HYBRID-NEXT: cmpq %rdi, %rsi
|
||||
; HYBRID-NEXT: sbbq $0, %rcx
|
||||
; HYBRID-NEXT: movl $0, %ecx
|
||||
; HYBRID-NEXT: sbbq $0, %rcx
|
||||
; HYBRID-NEXT: sbbq $0, %rax
|
||||
; HYBRID-NEXT: sbbq %rcx, %rcx
|
||||
; HYBRID-NEXT: sbbq %rax, %rax
|
||||
; HYBRID-NEXT: setae %al
|
||||
; HYBRID-NEXT: movzbl %al, %ecx
|
||||
; HYBRID-NEXT: movl $2, %eax
|
||||
@ -730,8 +730,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
||||
; BURR-NEXT: cmpq %rdi, %rsi
|
||||
; BURR-NEXT: sbbq $0, %rcx
|
||||
; BURR-NEXT: movl $0, %ecx
|
||||
; BURR-NEXT: sbbq $0, %rcx
|
||||
; BURR-NEXT: sbbq $0, %rax
|
||||
; BURR-NEXT: sbbq %rcx, %rcx
|
||||
; BURR-NEXT: sbbq %rax, %rax
|
||||
; BURR-NEXT: setae %al
|
||||
; BURR-NEXT: movzbl %al, %ecx
|
||||
; BURR-NEXT: movl $2, %eax
|
||||
@ -747,8 +747,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
||||
; SRC-NEXT: cmpq %rdi, %rsi
|
||||
; SRC-NEXT: sbbq $0, %rax
|
||||
; SRC-NEXT: movl $0, %eax
|
||||
; SRC-NEXT: sbbq $0, %rax
|
||||
; SRC-NEXT: sbbq $0, %rcx
|
||||
; SRC-NEXT: sbbq %rax, %rax
|
||||
; SRC-NEXT: sbbq %rcx, %rcx
|
||||
; SRC-NEXT: setae %al
|
||||
; SRC-NEXT: movzbl %al, %ecx
|
||||
; SRC-NEXT: movl $2, %eax
|
||||
@ -765,8 +765,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
|
||||
; LIN-NEXT: cmpq %rdi, %rsi
|
||||
; LIN-NEXT: sbbq $0, %rdx
|
||||
; LIN-NEXT: movl $0, %edx
|
||||
; LIN-NEXT: sbbq $0, %rdx
|
||||
; LIN-NEXT: sbbq $0, %rcx
|
||||
; LIN-NEXT: sbbq %rdx, %rdx
|
||||
; LIN-NEXT: sbbq %rcx, %rcx
|
||||
; LIN-NEXT: setae %cl
|
||||
; LIN-NEXT: movzbl %cl, %ecx
|
||||
; LIN-NEXT: subq %rcx, %rax
|
||||
|
@ -624,21 +624,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
|
||||
;; Test integer select between values and constants.
|
||||
|
||||
define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||
; GENERIC-LABEL: test9:
|
||||
; GENERIC: ## %bb.0:
|
||||
; GENERIC-NEXT: cmpq $1, %rdi
|
||||
; GENERIC-NEXT: sbbq %rax, %rax
|
||||
; GENERIC-NEXT: orq %rsi, %rax
|
||||
; GENERIC-NEXT: retq
|
||||
;
|
||||
; ATOM-LABEL: test9:
|
||||
; ATOM: ## %bb.0:
|
||||
; ATOM-NEXT: cmpq $1, %rdi
|
||||
; ATOM-NEXT: sbbq %rax, %rax
|
||||
; ATOM-NEXT: orq %rsi, %rax
|
||||
; ATOM-NEXT: nop
|
||||
; ATOM-NEXT: nop
|
||||
; ATOM-NEXT: retq
|
||||
; CHECK-LABEL: test9:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpq $1, %rdi
|
||||
; CHECK-NEXT: sbbq %rax, %rax
|
||||
; CHECK-NEXT: orq %rsi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; ATHLON-LABEL: test9:
|
||||
; ATHLON: ## %bb.0:
|
||||
@ -672,21 +664,13 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||
|
||||
;; Same as test9
|
||||
define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||
; GENERIC-LABEL: test9a:
|
||||
; GENERIC: ## %bb.0:
|
||||
; GENERIC-NEXT: cmpq $1, %rdi
|
||||
; GENERIC-NEXT: sbbq %rax, %rax
|
||||
; GENERIC-NEXT: orq %rsi, %rax
|
||||
; GENERIC-NEXT: retq
|
||||
;
|
||||
; ATOM-LABEL: test9a:
|
||||
; ATOM: ## %bb.0:
|
||||
; ATOM-NEXT: cmpq $1, %rdi
|
||||
; ATOM-NEXT: sbbq %rax, %rax
|
||||
; ATOM-NEXT: orq %rsi, %rax
|
||||
; ATOM-NEXT: nop
|
||||
; ATOM-NEXT: nop
|
||||
; ATOM-NEXT: retq
|
||||
; CHECK-LABEL: test9a:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpq $1, %rdi
|
||||
; CHECK-NEXT: sbbq %rax, %rax
|
||||
; CHECK-NEXT: orq %rsi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; ATHLON-LABEL: test9a:
|
||||
; ATHLON: ## %bb.0:
|
||||
@ -803,6 +787,7 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||
define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||
; CHECK-LABEL: test11:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpq $1, %rdi
|
||||
; CHECK-NEXT: sbbq %rax, %rax
|
||||
; CHECK-NEXT: notq %rax
|
||||
@ -842,6 +827,7 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||
; CHECK-LABEL: test11a:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpq $1, %rdi
|
||||
; CHECK-NEXT: sbbq %rax, %rax
|
||||
; CHECK-NEXT: notq %rax
|
||||
|
@ -15,6 +15,7 @@ define i32 @PR29058(i8 %x, i32 %y) {
|
||||
; CHECK-NEXT: testb %dil, %dil
|
||||
; CHECK-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: cmpb $1, %dil
|
||||
; CHECK-NEXT: sbbb %dl, %dl
|
||||
; CHECK-NEXT: orb %dl, %cl
|
||||
|
@ -50,8 +50,9 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
|
||||
; AVX-LABEL: test_v4f64_sext:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
|
||||
; AVX-NEXT: vmovmskpd %ymm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: vmovmskpd %ymm0, %ecx
|
||||
; AVX-NEXT: xorl %eax, %eax
|
||||
; AVX-NEXT: cmpl %ecx, %eax
|
||||
; AVX-NEXT: sbbq %rax, %rax
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
@ -83,9 +84,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
|
||||
; SSE-NEXT: cmpltpd %xmm0, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: movmskps %xmm2, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: cltq
|
||||
; SSE-NEXT: xorl %ecx, %ecx
|
||||
; SSE-NEXT: cmpl %eax, %ecx
|
||||
; SSE-NEXT: sbbl %ecx, %ecx
|
||||
; SSE-NEXT: movslq %ecx, %rax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f64_legal_sext:
|
||||
@ -94,9 +96,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovmskps %xmm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: sbbl %eax, %eax
|
||||
; AVX-NEXT: cltq
|
||||
; AVX-NEXT: xorl %ecx, %ecx
|
||||
; AVX-NEXT: cmpl %eax, %ecx
|
||||
; AVX-NEXT: sbbl %ecx, %ecx
|
||||
; AVX-NEXT: movslq %ecx, %rax
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -128,16 +131,18 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_v4f32_sext:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cmpltps %xmm0, %xmm1
|
||||
; SSE-NEXT: movmskps %xmm1, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: movmskps %xmm1, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4f32_sext:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovmskps %xmm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: vmovmskps %xmm0, %ecx
|
||||
; AVX-NEXT: xorl %eax, %eax
|
||||
; AVX-NEXT: cmpl %ecx, %eax
|
||||
; AVX-NEXT: sbbl %eax, %eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -166,16 +171,18 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
|
||||
; SSE-NEXT: cmpltps %xmm1, %xmm3
|
||||
; SSE-NEXT: cmpltps %xmm0, %xmm2
|
||||
; SSE-NEXT: orps %xmm3, %xmm2
|
||||
; SSE-NEXT: movmskps %xmm2, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: movmskps %xmm2, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8f32_sext:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
|
||||
; AVX-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: vmovmskps %ymm0, %ecx
|
||||
; AVX-NEXT: xorl %eax, %eax
|
||||
; AVX-NEXT: cmpl %ecx, %eax
|
||||
; AVX-NEXT: sbbl %eax, %eax
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
@ -210,8 +217,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
|
||||
; SSE-NEXT: cmpltps %xmm1, %xmm3
|
||||
; SSE-NEXT: cmpltps %xmm0, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: pmovmskb %xmm2, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -220,8 +228,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
|
||||
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX-NEXT: xorl %eax, %eax
|
||||
; AVX-NEXT: cmpl %ecx, %eax
|
||||
; AVX-NEXT: sbbl %eax, %eax
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
@ -303,8 +312,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: vmovmskpd %ymm0, %ecx
|
||||
; AVX1-NEXT: xorl %eax, %eax
|
||||
; AVX1-NEXT: cmpl %ecx, %eax
|
||||
; AVX1-NEXT: sbbq %rax, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -312,8 +322,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; AVX2-LABEL: test_v4i64_sext:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vmovmskpd %ymm0, %ecx
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: cmpl %ecx, %eax
|
||||
; AVX2-NEXT: sbbq %rax, %rax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -345,9 +356,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; SSE-NEXT: pcmpgtq %xmm2, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: movmskps %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: cltq
|
||||
; SSE-NEXT: xorl %ecx, %ecx
|
||||
; SSE-NEXT: cmpl %eax, %ecx
|
||||
; SSE-NEXT: sbbl %ecx, %ecx
|
||||
; SSE-NEXT: movslq %ecx, %rax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: test_v4i64_legal_sext:
|
||||
@ -358,9 +370,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovmskps %xmm0, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: sbbl %eax, %eax
|
||||
; AVX1-NEXT: cltq
|
||||
; AVX1-NEXT: xorl %ecx, %ecx
|
||||
; AVX1-NEXT: cmpl %eax, %ecx
|
||||
; AVX1-NEXT: sbbl %ecx, %ecx
|
||||
; AVX1-NEXT: movslq %ecx, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -370,9 +383,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovmskps %xmm0, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: sbbl %eax, %eax
|
||||
; AVX2-NEXT: cltq
|
||||
; AVX2-NEXT: xorl %ecx, %ecx
|
||||
; AVX2-NEXT: cmpl %eax, %ecx
|
||||
; AVX2-NEXT: sbbl %ecx, %ecx
|
||||
; AVX2-NEXT: movslq %ecx, %rax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -404,16 +418,18 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; SSE-LABEL: test_v4i32_sext:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSE-NEXT: movmskps %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: movmskps %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4i32_sext:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovmskps %xmm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: vmovmskps %xmm0, %ecx
|
||||
; AVX-NEXT: xorl %eax, %eax
|
||||
; AVX-NEXT: cmpl %ecx, %eax
|
||||
; AVX-NEXT: sbbl %eax, %eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -442,8 +458,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
|
||||
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: movmskps %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: movmskps %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -454,8 +471,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: vmovmskps %ymm0, %ecx
|
||||
; AVX1-NEXT: xorl %eax, %eax
|
||||
; AVX1-NEXT: cmpl %ecx, %eax
|
||||
; AVX1-NEXT: sbbl %eax, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -463,8 +481,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; AVX2-LABEL: test_v8i32_sext:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %ecx
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: cmpl %ecx, %eax
|
||||
; AVX2-NEXT: sbbl %eax, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -499,8 +518,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
|
||||
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: pmovmskb %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -511,8 +531,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: xorl %eax, %eax
|
||||
; AVX1-NEXT: cmpl %ecx, %eax
|
||||
; AVX1-NEXT: sbbl %eax, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -522,8 +543,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: cmpl %ecx, %eax
|
||||
; AVX2-NEXT: sbbl %eax, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -559,8 +581,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
; SSE-LABEL: test_v8i16_sext:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: pmovmskb %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE-NEXT: retq
|
||||
@ -568,8 +591,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
; AVX-LABEL: test_v8i16_sext:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX-NEXT: xorl %eax, %eax
|
||||
; AVX-NEXT: cmpl %ecx, %eax
|
||||
; AVX-NEXT: sbbl %eax, %eax
|
||||
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX-NEXT: retq
|
||||
@ -604,8 +628,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
|
||||
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: pmovmskb %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE-NEXT: retq
|
||||
@ -632,8 +657,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; AVX2-LABEL: test_v16i16_sext:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: cmpl %ecx, %eax
|
||||
; AVX2-NEXT: sbbl %eax, %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -674,8 +700,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
|
||||
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
|
||||
; SSE-NEXT: packsswb %xmm1, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: pmovmskb %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE-NEXT: retq
|
||||
@ -687,8 +714,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: xorl %eax, %eax
|
||||
; AVX1-NEXT: cmpl %ecx, %eax
|
||||
; AVX1-NEXT: sbbl %eax, %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -699,8 +727,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: cmpl %ecx, %eax
|
||||
; AVX2-NEXT: sbbl %eax, %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -742,8 +771,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; SSE-LABEL: test_v16i8_sext:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: pmovmskb %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE-NEXT: retq
|
||||
@ -751,8 +781,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; AVX-LABEL: test_v16i8_sext:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: negl %eax
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX-NEXT: xorl %eax, %eax
|
||||
; AVX-NEXT: cmpl %ecx, %eax
|
||||
; AVX-NEXT: sbbl %eax, %eax
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX-NEXT: retq
|
||||
@ -791,8 +822,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; SSE-NEXT: pcmpgtb %xmm3, %xmm1
|
||||
; SSE-NEXT: pcmpgtb %xmm2, %xmm0
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: negl %eax
|
||||
; SSE-NEXT: pmovmskb %xmm0, %ecx
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: cmpl %ecx, %eax
|
||||
; SSE-NEXT: sbbl %eax, %eax
|
||||
; SSE-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE-NEXT: retq
|
||||
@ -821,8 +853,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; AVX2-LABEL: test_v32i8_sext:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: cmpl %ecx, %eax
|
||||
; AVX2-NEXT: sbbl %eax, %eax
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
|
Loading…
Reference in New Issue
Block a user