mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86] Teach combineCVTP2I_CVTTP2I to handle STRICT_CVTTP2SI/STRICT_CVTTP2UI
Allows us to shrink 128-bit simple load to enable folding for v2f32->v2i64 vcvttps2qq/vcvttps2uqq.
This commit is contained in:
parent
a619e90821
commit
6d167c498f
@ -44765,11 +44765,11 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
// FIXME: Handle strict fp nodes.
|
||||
bool IsStrict = N->isTargetStrictFPOpcode();
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// Convert a full vector load into vzload when not all bits are needed.
|
||||
SDValue In = N->getOperand(0);
|
||||
SDValue In = N->getOperand(IsStrict ? 1 : 0);
|
||||
MVT InVT = In.getSimpleValueType();
|
||||
if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
|
||||
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
|
||||
@ -44780,9 +44780,16 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
|
||||
MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
|
||||
if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
|
||||
SDLoc dl(N);
|
||||
SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
|
||||
DAG.getBitcast(InVT, VZLoad));
|
||||
DCI.CombineTo(N, Convert);
|
||||
if (IsStrict) {
|
||||
SDValue Convert =
|
||||
DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other},
|
||||
{N->getOperand(0), DAG.getBitcast(InVT, VZLoad)});
|
||||
DCI.CombineTo(N, Convert, Convert.getValue(1));
|
||||
} else {
|
||||
SDValue Convert =
|
||||
DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad));
|
||||
DCI.CombineTo(N, Convert);
|
||||
}
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
|
||||
DCI.recursivelyDeleteUnusedNodes(LN);
|
||||
return SDValue(N, 0);
|
||||
@ -47991,8 +47998,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
|
||||
case X86ISD::CVTP2SI:
|
||||
case X86ISD::CVTP2UI:
|
||||
case X86ISD::STRICT_CVTTP2SI:
|
||||
case X86ISD::CVTTP2SI:
|
||||
case X86ISD::CVTTP2UI: return combineCVTP2I_CVTTP2I(N, DAG, DCI);
|
||||
case X86ISD::STRICT_CVTTP2UI:
|
||||
case X86ISD::CVTTP2UI:
|
||||
return combineCVTP2I_CVTTP2I(N, DAG, DCI);
|
||||
case X86ISD::STRICT_CVTPH2PS:
|
||||
case X86ISD::CVTPH2PS: return combineCVTPH2PS(N, DAG, DCI);
|
||||
case X86ISD::BT: return combineBT(N, DAG, DCI);
|
||||
|
@ -1,16 +1,16 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-32
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-64
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
|
||||
|
||||
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
|
||||
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
|
||||
@ -703,6 +703,201 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
|
||||
; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; SSE-32: # %bb.0:
|
||||
; SSE-32-NEXT: pushl %ebp
|
||||
; SSE-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE-32-NEXT: .cfi_offset %ebp, -8
|
||||
; SSE-32-NEXT: movl %esp, %ebp
|
||||
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE-32-NEXT: andl $-8, %esp
|
||||
; SSE-32-NEXT: subl $24, %esp
|
||||
; SSE-32-NEXT: movl 8(%ebp), %eax
|
||||
; SSE-32-NEXT: movaps (%eax), %xmm0
|
||||
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: wait
|
||||
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: wait
|
||||
; SSE-32-NEXT: fnstcw (%esp)
|
||||
; SSE-32-NEXT: movzwl (%esp), %eax
|
||||
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw (%esp)
|
||||
; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE-32-NEXT: movl %ebp, %esp
|
||||
; SSE-32-NEXT: popl %ebp
|
||||
; SSE-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; SSE-32-NEXT: retl
|
||||
;
|
||||
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; SSE-64: # %bb.0:
|
||||
; SSE-64-NEXT: movaps (%rdi), %xmm1
|
||||
; SSE-64-NEXT: cvttss2si %xmm1, %rax
|
||||
; SSE-64-NEXT: movq %rax, %xmm0
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; SSE-64-NEXT: cvttss2si %xmm1, %rax
|
||||
; SSE-64-NEXT: movq %rax, %xmm1
|
||||
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE-64-NEXT: retq
|
||||
;
|
||||
; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX-32: # %bb.0:
|
||||
; AVX-32-NEXT: pushl %ebp
|
||||
; AVX-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX-32-NEXT: movl %esp, %ebp
|
||||
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX-32-NEXT: andl $-8, %esp
|
||||
; AVX-32-NEXT: subl $16, %esp
|
||||
; AVX-32-NEXT: movl 8(%ebp), %eax
|
||||
; AVX-32-NEXT: vmovaps (%eax), %xmm0
|
||||
; AVX-32-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: flds (%esp)
|
||||
; AVX-32-NEXT: fisttpll (%esp)
|
||||
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: wait
|
||||
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX-32-NEXT: movl %ebp, %esp
|
||||
; AVX-32-NEXT: popl %ebp
|
||||
; AVX-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX-32-NEXT: retl
|
||||
;
|
||||
; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX-64: # %bb.0:
|
||||
; AVX-64-NEXT: vcvttss2si 4(%rdi), %rax
|
||||
; AVX-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-64-NEXT: vcvttss2si (%rdi), %rax
|
||||
; AVX-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX-64-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: pushl %ebp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512F-32-NEXT: movl %esp, %ebp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512F-32-NEXT: andl $-8, %esp
|
||||
; AVX512F-32-NEXT: subl $16, %esp
|
||||
; AVX512F-32-NEXT: movl 8(%ebp), %eax
|
||||
; AVX512F-32-NEXT: vmovdqa (%eax), %xmm0
|
||||
; AVX512F-32-NEXT: vmovd %xmm0, (%esp)
|
||||
; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: flds (%esp)
|
||||
; AVX512F-32-NEXT: fisttpll (%esp)
|
||||
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: wait
|
||||
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512F-32-NEXT: movl %ebp, %esp
|
||||
; AVX512F-32-NEXT: popl %ebp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512F-32-NEXT: retl
|
||||
;
|
||||
; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512F-64: # %bb.0:
|
||||
; AVX512F-64-NEXT: vcvttss2si 4(%rdi), %rax
|
||||
; AVX512F-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-64-NEXT: vcvttss2si (%rdi), %rax
|
||||
; AVX512F-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-64-NEXT: retq
|
||||
;
|
||||
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512VL-32: # %bb.0:
|
||||
; AVX512VL-32-NEXT: pushl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512VL-32-NEXT: movl %esp, %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512VL-32-NEXT: andl $-8, %esp
|
||||
; AVX512VL-32-NEXT: subl $16, %esp
|
||||
; AVX512VL-32-NEXT: movl 8(%ebp), %eax
|
||||
; AVX512VL-32-NEXT: vmovdqa (%eax), %xmm0
|
||||
; AVX512VL-32-NEXT: vmovd %xmm0, (%esp)
|
||||
; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds (%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll (%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: wait
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: movl %ebp, %esp
|
||||
; AVX512VL-32-NEXT: popl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512VL-32-NEXT: retl
|
||||
;
|
||||
; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512VL-64: # %bb.0:
|
||||
; AVX512VL-64-NEXT: vcvttss2si 4(%rdi), %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-64-NEXT: vcvttss2si (%rdi), %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
|
||||
; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
; AVX512DQ-32-NEXT: retl
|
||||
;
|
||||
; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-64: # %bb.0:
|
||||
; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-64-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-64-NEXT: vzeroupper
|
||||
; AVX512DQ-64-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512VLDQ-32: # %bb.0:
|
||||
; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VLDQ-32-NEXT: vcvttps2qq (%eax), %xmm0
|
||||
; AVX512VLDQ-32-NEXT: retl
|
||||
;
|
||||
; AVX512VLDQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
|
||||
; AVX512VLDQ-64: # %bb.0:
|
||||
; AVX512VLDQ-64-NEXT: vcvttps2qq (%rdi), %xmm0
|
||||
; AVX512VLDQ-64-NEXT: retq
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%c = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
|
||||
ret <2 x i64> %c
|
||||
}
|
||||
|
||||
define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
|
||||
; SSE-32: # %bb.0:
|
||||
@ -717,10 +912,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; SSE-32-NEXT: comiss %xmm2, %xmm0
|
||||
; SSE-32-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-32-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE-32-NEXT: jb .LBB3_2
|
||||
; SSE-32-NEXT: jb .LBB4_2
|
||||
; SSE-32-NEXT: # %bb.1:
|
||||
; SSE-32-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE-32-NEXT: .LBB3_2:
|
||||
; SSE-32-NEXT: .LBB4_2:
|
||||
; SSE-32-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE-32-NEXT: subss %xmm3, %xmm4
|
||||
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
|
||||
@ -736,10 +931,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-32-NEXT: comiss %xmm2, %xmm0
|
||||
; SSE-32-NEXT: jb .LBB3_4
|
||||
; SSE-32-NEXT: jb .LBB4_4
|
||||
; SSE-32-NEXT: # %bb.3:
|
||||
; SSE-32-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE-32-NEXT: .LBB3_4:
|
||||
; SSE-32-NEXT: .LBB4_4:
|
||||
; SSE-32-NEXT: subss %xmm1, %xmm0
|
||||
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: setae %cl
|
||||
@ -776,10 +971,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; SSE-64-NEXT: comiss %xmm3, %xmm0
|
||||
; SSE-64-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE-64-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-64-NEXT: jb .LBB3_2
|
||||
; SSE-64-NEXT: jb .LBB4_2
|
||||
; SSE-64-NEXT: # %bb.1:
|
||||
; SSE-64-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE-64-NEXT: .LBB3_2:
|
||||
; SSE-64-NEXT: .LBB4_2:
|
||||
; SSE-64-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE-64-NEXT: subss %xmm1, %xmm4
|
||||
; SSE-64-NEXT: cvttss2si %xmm4, %rax
|
||||
@ -790,10 +985,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; SSE-64-NEXT: movq %rcx, %xmm1
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-64-NEXT: comiss %xmm3, %xmm0
|
||||
; SSE-64-NEXT: jb .LBB3_4
|
||||
; SSE-64-NEXT: jb .LBB4_4
|
||||
; SSE-64-NEXT: # %bb.3:
|
||||
; SSE-64-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE-64-NEXT: .LBB3_4:
|
||||
; SSE-64-NEXT: .LBB4_4:
|
||||
; SSE-64-NEXT: subss %xmm2, %xmm0
|
||||
; SSE-64-NEXT: cvttss2si %xmm0, %rax
|
||||
; SSE-64-NEXT: setae %cl
|
||||
@ -819,10 +1014,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; AVX-32-NEXT: vcomiss %xmm1, %xmm3
|
||||
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||
; AVX-32-NEXT: jb .LBB3_2
|
||||
; AVX-32-NEXT: jb .LBB4_2
|
||||
; AVX-32-NEXT: # %bb.1:
|
||||
; AVX-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX-32-NEXT: .LBB3_2:
|
||||
; AVX-32-NEXT: .LBB4_2:
|
||||
; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
@ -833,10 +1028,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; AVX-32-NEXT: shll $31, %eax
|
||||
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX-32-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-32-NEXT: jb .LBB3_4
|
||||
; AVX-32-NEXT: jb .LBB4_4
|
||||
; AVX-32-NEXT: # %bb.3:
|
||||
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-32-NEXT: .LBB3_4:
|
||||
; AVX-32-NEXT: .LBB4_4:
|
||||
; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-32-NEXT: flds (%esp)
|
||||
@ -861,10 +1056,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
||||
; AVX-64-NEXT: jb .LBB3_2
|
||||
; AVX-64-NEXT: jb .LBB4_2
|
||||
; AVX-64-NEXT: # %bb.1:
|
||||
; AVX-64-NEXT: vmovaps %xmm1, %xmm3
|
||||
; AVX-64-NEXT: .LBB3_2:
|
||||
; AVX-64-NEXT: .LBB4_2:
|
||||
; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
|
||||
; AVX-64-NEXT: vcvttss2si %xmm3, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
@ -874,10 +1069,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
; AVX-64-NEXT: vmovq %rcx, %xmm3
|
||||
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-64-NEXT: jb .LBB3_4
|
||||
; AVX-64-NEXT: jb .LBB4_4
|
||||
; AVX-64-NEXT: # %bb.3:
|
||||
; AVX-64-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-64-NEXT: .LBB3_4:
|
||||
; AVX-64-NEXT: .LBB4_4:
|
||||
; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX-64-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
@ -1022,6 +1217,349 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
|
||||
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; SSE-32: # %bb.0:
|
||||
; SSE-32-NEXT: pushl %ebp
|
||||
; SSE-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; SSE-32-NEXT: .cfi_offset %ebp, -8
|
||||
; SSE-32-NEXT: movl %esp, %ebp
|
||||
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; SSE-32-NEXT: andl $-8, %esp
|
||||
; SSE-32-NEXT: subl $24, %esp
|
||||
; SSE-32-NEXT: movl 8(%ebp), %eax
|
||||
; SSE-32-NEXT: movaps (%eax), %xmm0
|
||||
; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-32-NEXT: comiss %xmm2, %xmm0
|
||||
; SSE-32-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-32-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE-32-NEXT: jb .LBB5_2
|
||||
; SSE-32-NEXT: # %bb.1:
|
||||
; SSE-32-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE-32-NEXT: .LBB5_2:
|
||||
; SSE-32-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE-32-NEXT: subss %xmm3, %xmm4
|
||||
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: setae %al
|
||||
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: wait
|
||||
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
|
||||
; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-32-NEXT: comiss %xmm2, %xmm0
|
||||
; SSE-32-NEXT: jb .LBB5_4
|
||||
; SSE-32-NEXT: # %bb.3:
|
||||
; SSE-32-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE-32-NEXT: .LBB5_4:
|
||||
; SSE-32-NEXT: subss %xmm1, %xmm0
|
||||
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: setae %cl
|
||||
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: wait
|
||||
; SSE-32-NEXT: fnstcw (%esp)
|
||||
; SSE-32-NEXT: movzwl (%esp), %edx
|
||||
; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
|
||||
; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: fldcw (%esp)
|
||||
; SSE-32-NEXT: movzbl %al, %eax
|
||||
; SSE-32-NEXT: shll $31, %eax
|
||||
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; SSE-32-NEXT: movd %eax, %xmm1
|
||||
; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-32-NEXT: movzbl %cl, %eax
|
||||
; SSE-32-NEXT: shll $31, %eax
|
||||
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; SSE-32-NEXT: movd %eax, %xmm1
|
||||
; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSE-32-NEXT: movl %ebp, %esp
|
||||
; SSE-32-NEXT: popl %ebp
|
||||
; SSE-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; SSE-32-NEXT: retl
|
||||
;
|
||||
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; SSE-64: # %bb.0:
|
||||
; SSE-64-NEXT: movaps (%rdi), %xmm1
|
||||
; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; SSE-64-NEXT: comiss %xmm3, %xmm1
|
||||
; SSE-64-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE-64-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-64-NEXT: jb .LBB5_2
|
||||
; SSE-64-NEXT: # %bb.1:
|
||||
; SSE-64-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE-64-NEXT: .LBB5_2:
|
||||
; SSE-64-NEXT: movaps %xmm1, %xmm4
|
||||
; SSE-64-NEXT: subss %xmm0, %xmm4
|
||||
; SSE-64-NEXT: cvttss2si %xmm4, %rax
|
||||
; SSE-64-NEXT: setae %cl
|
||||
; SSE-64-NEXT: movzbl %cl, %ecx
|
||||
; SSE-64-NEXT: shlq $63, %rcx
|
||||
; SSE-64-NEXT: xorq %rax, %rcx
|
||||
; SSE-64-NEXT: movq %rcx, %xmm0
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; SSE-64-NEXT: comiss %xmm3, %xmm1
|
||||
; SSE-64-NEXT: jb .LBB5_4
|
||||
; SSE-64-NEXT: # %bb.3:
|
||||
; SSE-64-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE-64-NEXT: .LBB5_4:
|
||||
; SSE-64-NEXT: subss %xmm2, %xmm1
|
||||
; SSE-64-NEXT: cvttss2si %xmm1, %rax
|
||||
; SSE-64-NEXT: setae %cl
|
||||
; SSE-64-NEXT: movzbl %cl, %ecx
|
||||
; SSE-64-NEXT: shlq $63, %rcx
|
||||
; SSE-64-NEXT: xorq %rax, %rcx
|
||||
; SSE-64-NEXT: movq %rcx, %xmm1
|
||||
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE-64-NEXT: retq
|
||||
;
|
||||
; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX-32: # %bb.0:
|
||||
; AVX-32-NEXT: pushl %ebp
|
||||
; AVX-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX-32-NEXT: movl %esp, %ebp
|
||||
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX-32-NEXT: andl $-8, %esp
|
||||
; AVX-32-NEXT: subl $16, %esp
|
||||
; AVX-32-NEXT: movl 8(%ebp), %eax
|
||||
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-32-NEXT: vcomiss %xmm1, %xmm3
|
||||
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||
; AVX-32-NEXT: jb .LBB5_2
|
||||
; AVX-32-NEXT: # %bb.1:
|
||||
; AVX-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX-32-NEXT: .LBB5_2:
|
||||
; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: wait
|
||||
; AVX-32-NEXT: setae %al
|
||||
; AVX-32-NEXT: movzbl %al, %eax
|
||||
; AVX-32-NEXT: shll $31, %eax
|
||||
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX-32-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-32-NEXT: jb .LBB5_4
|
||||
; AVX-32-NEXT: # %bb.3:
|
||||
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-32-NEXT: .LBB5_4:
|
||||
; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-32-NEXT: flds (%esp)
|
||||
; AVX-32-NEXT: fisttpll (%esp)
|
||||
; AVX-32-NEXT: wait
|
||||
; AVX-32-NEXT: setae %cl
|
||||
; AVX-32-NEXT: movzbl %cl, %ecx
|
||||
; AVX-32-NEXT: shll $31, %ecx
|
||||
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
|
||||
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; AVX-32-NEXT: movl %ebp, %esp
|
||||
; AVX-32-NEXT: popl %ebp
|
||||
; AVX-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX-32-NEXT: retl
|
||||
;
|
||||
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX-64: # %bb.0:
|
||||
; AVX-64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-64-NEXT: vcomiss %xmm1, %xmm3
|
||||
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||
; AVX-64-NEXT: jb .LBB5_2
|
||||
; AVX-64-NEXT: # %bb.1:
|
||||
; AVX-64-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX-64-NEXT: .LBB5_2:
|
||||
; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX-64-NEXT: vcvttss2si %xmm3, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
; AVX-64-NEXT: movzbl %cl, %ecx
|
||||
; AVX-64-NEXT: shlq $63, %rcx
|
||||
; AVX-64-NEXT: xorq %rax, %rcx
|
||||
; AVX-64-NEXT: vmovq %rcx, %xmm3
|
||||
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-64-NEXT: jb .LBB5_4
|
||||
; AVX-64-NEXT: # %bb.3:
|
||||
; AVX-64-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-64-NEXT: .LBB5_4:
|
||||
; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX-64-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
; AVX-64-NEXT: movzbl %cl, %ecx
|
||||
; AVX-64-NEXT: shlq $63, %rcx
|
||||
; AVX-64-NEXT: xorq %rax, %rcx
|
||||
; AVX-64-NEXT: vmovq %rcx, %xmm0
|
||||
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
||||
; AVX-64-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: pushl %ebp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512F-32-NEXT: movl %esp, %ebp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512F-32-NEXT: andl $-8, %esp
|
||||
; AVX512F-32-NEXT: subl $16, %esp
|
||||
; AVX512F-32-NEXT: movl 8(%ebp), %eax
|
||||
; AVX512F-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX512F-32-NEXT: xorl %eax, %eax
|
||||
; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1
|
||||
; AVX512F-32-NEXT: setb %cl
|
||||
; AVX512F-32-NEXT: kmovw %ecx, %k1
|
||||
; AVX512F-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
||||
; AVX512F-32-NEXT: vmovaps %xmm2, %xmm4
|
||||
; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
|
||||
; AVX512F-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
|
||||
; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: wait
|
||||
; AVX512F-32-NEXT: setae %al
|
||||
; AVX512F-32-NEXT: shll $31, %eax
|
||||
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: xorl %ecx, %ecx
|
||||
; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0
|
||||
; AVX512F-32-NEXT: setb %dl
|
||||
; AVX512F-32-NEXT: kmovw %edx, %k1
|
||||
; AVX512F-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX512F-32-NEXT: flds (%esp)
|
||||
; AVX512F-32-NEXT: fisttpll (%esp)
|
||||
; AVX512F-32-NEXT: wait
|
||||
; AVX512F-32-NEXT: setae %cl
|
||||
; AVX512F-32-NEXT: shll $31, %ecx
|
||||
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
|
||||
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; AVX512F-32-NEXT: movl %ebp, %esp
|
||||
; AVX512F-32-NEXT: popl %ebp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512F-32-NEXT: retl
|
||||
;
|
||||
; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512F-64: # %bb.0:
|
||||
; AVX512F-64-NEXT: vcvttss2usi 4(%rdi), %rax
|
||||
; AVX512F-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-64-NEXT: vcvttss2usi (%rdi), %rax
|
||||
; AVX512F-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-64-NEXT: retq
|
||||
;
|
||||
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512VL-32: # %bb.0:
|
||||
; AVX512VL-32-NEXT: pushl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512VL-32-NEXT: movl %esp, %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512VL-32-NEXT: andl $-8, %esp
|
||||
; AVX512VL-32-NEXT: subl $16, %esp
|
||||
; AVX512VL-32-NEXT: movl 8(%ebp), %eax
|
||||
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: xorl %eax, %eax
|
||||
; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1
|
||||
; AVX512VL-32-NEXT: setb %cl
|
||||
; AVX512VL-32-NEXT: kmovw %ecx, %k1
|
||||
; AVX512VL-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm2, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: wait
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: xorl %ecx, %ecx
|
||||
; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0
|
||||
; AVX512VL-32-NEXT: setb %dl
|
||||
; AVX512VL-32-NEXT: kmovw %edx, %k1
|
||||
; AVX512VL-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX512VL-32-NEXT: flds (%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll (%esp)
|
||||
; AVX512VL-32-NEXT: wait
|
||||
; AVX512VL-32-NEXT: setae %cl
|
||||
; AVX512VL-32-NEXT: shll $31, %ecx
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: movl %ebp, %esp
|
||||
; AVX512VL-32-NEXT: popl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512VL-32-NEXT: retl
|
||||
;
|
||||
; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512VL-64: # %bb.0:
|
||||
; AVX512VL-64-NEXT: vcvttss2usi 4(%rdi), %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-64-NEXT: vcvttss2usi (%rdi), %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-32: # %bb.0:
|
||||
; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
|
||||
; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-32-NEXT: vzeroupper
|
||||
; AVX512DQ-32-NEXT: retl
|
||||
;
|
||||
; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512DQ-64: # %bb.0:
|
||||
; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX512DQ-64-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-64-NEXT: vzeroupper
|
||||
; AVX512DQ-64-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512VLDQ-32: # %bb.0:
|
||||
; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VLDQ-32-NEXT: vcvttps2uqq (%eax), %xmm0
|
||||
; AVX512VLDQ-32-NEXT: retl
|
||||
;
|
||||
; AVX512VLDQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
|
||||
; AVX512VLDQ-64: # %bb.0:
|
||||
; AVX512VLDQ-64-NEXT: vcvttps2uqq (%rdi), %xmm0
|
||||
; AVX512VLDQ-64-NEXT: retq
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%c = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
|
||||
ret <2 x i64> %c
|
||||
}
|
||||
|
||||
define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
|
||||
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
|
||||
; SSE-32: # %bb.0:
|
||||
@ -1069,10 +1607,10 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
|
||||
; SSE-32-NEXT: comisd %xmm3, %xmm0
|
||||
; SSE-32-NEXT: xorpd %xmm2, %xmm2
|
||||
; SSE-32-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE-32-NEXT: jb .LBB5_2
|
||||
; SSE-32-NEXT: jb .LBB7_2
|
||||
; SSE-32-NEXT: # %bb.1:
|
||||
; SSE-32-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE-32-NEXT: .LBB5_2:
|
||||
; SSE-32-NEXT: .LBB7_2:
|
||||
; SSE-32-NEXT: setae %al
|
||||
; SSE-32-NEXT: movzbl %al, %eax
|
||||
; SSE-32-NEXT: shll $31, %eax
|
||||
@ -1083,10 +1621,10 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
|
||||
; SSE-32-NEXT: movd %ecx, %xmm1
|
||||
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-32-NEXT: comisd %xmm3, %xmm0
|
||||
; SSE-32-NEXT: jb .LBB5_4
|
||||
; SSE-32-NEXT: jb .LBB7_4
|
||||
; SSE-32-NEXT: # %bb.3:
|
||||
; SSE-32-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE-32-NEXT: .LBB5_4:
|
||||
; SSE-32-NEXT: .LBB7_4:
|
||||
; SSE-32-NEXT: setae %al
|
||||
; SSE-32-NEXT: movzbl %al, %eax
|
||||
; SSE-32-NEXT: shll $31, %eax
|
||||
@ -1225,10 +1763,10 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
|
||||
; SSE-32-NEXT: comiss %xmm3, %xmm0
|
||||
; SSE-32-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE-32-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-32-NEXT: jb .LBB7_2
|
||||
; SSE-32-NEXT: jb .LBB9_2
|
||||
; SSE-32-NEXT: # %bb.1:
|
||||
; SSE-32-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE-32-NEXT: .LBB7_2:
|
||||
; SSE-32-NEXT: .LBB9_2:
|
||||
; SSE-32-NEXT: setae %al
|
||||
; SSE-32-NEXT: movzbl %al, %eax
|
||||
; SSE-32-NEXT: shll $31, %eax
|
||||
@ -1239,10 +1777,10 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
|
||||
; SSE-32-NEXT: movd %ecx, %xmm1
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-32-NEXT: comiss %xmm3, %xmm0
|
||||
; SSE-32-NEXT: jb .LBB7_4
|
||||
; SSE-32-NEXT: jb .LBB9_4
|
||||
; SSE-32-NEXT: # %bb.3:
|
||||
; SSE-32-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE-32-NEXT: .LBB7_4:
|
||||
; SSE-32-NEXT: .LBB9_4:
|
||||
; SSE-32-NEXT: setae %al
|
||||
; SSE-32-NEXT: movzbl %al, %eax
|
||||
; SSE-32-NEXT: shll $31, %eax
|
||||
@ -1888,10 +2426,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; SSE-32-NEXT: comisd %xmm2, %xmm0
|
||||
; SSE-32-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE-32-NEXT: xorpd %xmm3, %xmm3
|
||||
; SSE-32-NEXT: jb .LBB17_2
|
||||
; SSE-32-NEXT: jb .LBB19_2
|
||||
; SSE-32-NEXT: # %bb.1:
|
||||
; SSE-32-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE-32-NEXT: .LBB17_2:
|
||||
; SSE-32-NEXT: .LBB19_2:
|
||||
; SSE-32-NEXT: movapd %xmm0, %xmm4
|
||||
; SSE-32-NEXT: subsd %xmm3, %xmm4
|
||||
; SSE-32-NEXT: movsd %xmm4, {{[0-9]+}}(%esp)
|
||||
@ -1907,10 +2445,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-32-NEXT: comisd %xmm2, %xmm0
|
||||
; SSE-32-NEXT: jb .LBB17_4
|
||||
; SSE-32-NEXT: jb .LBB19_4
|
||||
; SSE-32-NEXT: # %bb.3:
|
||||
; SSE-32-NEXT: movapd %xmm2, %xmm1
|
||||
; SSE-32-NEXT: .LBB17_4:
|
||||
; SSE-32-NEXT: .LBB19_4:
|
||||
; SSE-32-NEXT: subsd %xmm1, %xmm0
|
||||
; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: setae %cl
|
||||
@ -1947,10 +2485,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; SSE-64-NEXT: comisd %xmm3, %xmm0
|
||||
; SSE-64-NEXT: xorpd %xmm2, %xmm2
|
||||
; SSE-64-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE-64-NEXT: jb .LBB17_2
|
||||
; SSE-64-NEXT: jb .LBB19_2
|
||||
; SSE-64-NEXT: # %bb.1:
|
||||
; SSE-64-NEXT: movapd %xmm3, %xmm1
|
||||
; SSE-64-NEXT: .LBB17_2:
|
||||
; SSE-64-NEXT: .LBB19_2:
|
||||
; SSE-64-NEXT: movapd %xmm0, %xmm4
|
||||
; SSE-64-NEXT: subsd %xmm1, %xmm4
|
||||
; SSE-64-NEXT: cvttsd2si %xmm4, %rax
|
||||
@ -1961,10 +2499,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; SSE-64-NEXT: movq %rcx, %xmm1
|
||||
; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-64-NEXT: comisd %xmm3, %xmm0
|
||||
; SSE-64-NEXT: jb .LBB17_4
|
||||
; SSE-64-NEXT: jb .LBB19_4
|
||||
; SSE-64-NEXT: # %bb.3:
|
||||
; SSE-64-NEXT: movapd %xmm3, %xmm2
|
||||
; SSE-64-NEXT: .LBB17_4:
|
||||
; SSE-64-NEXT: .LBB19_4:
|
||||
; SSE-64-NEXT: subsd %xmm2, %xmm0
|
||||
; SSE-64-NEXT: cvttsd2si %xmm0, %rax
|
||||
; SSE-64-NEXT: setae %cl
|
||||
@ -1990,10 +2528,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; AVX-32-NEXT: vcomisd %xmm1, %xmm3
|
||||
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
|
||||
; AVX-32-NEXT: jb .LBB17_2
|
||||
; AVX-32-NEXT: jb .LBB19_2
|
||||
; AVX-32-NEXT: # %bb.1:
|
||||
; AVX-32-NEXT: vmovapd %xmm1, %xmm4
|
||||
; AVX-32-NEXT: .LBB17_2:
|
||||
; AVX-32-NEXT: .LBB19_2:
|
||||
; AVX-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
|
||||
; AVX-32-NEXT: vmovsd %xmm3, (%esp)
|
||||
; AVX-32-NEXT: fldl (%esp)
|
||||
@ -2004,10 +2542,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; AVX-32-NEXT: shll $31, %eax
|
||||
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX-32-NEXT: vcomisd %xmm1, %xmm0
|
||||
; AVX-32-NEXT: jb .LBB17_4
|
||||
; AVX-32-NEXT: jb .LBB19_4
|
||||
; AVX-32-NEXT: # %bb.3:
|
||||
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
|
||||
; AVX-32-NEXT: .LBB17_4:
|
||||
; AVX-32-NEXT: .LBB19_4:
|
||||
; AVX-32-NEXT: vsubsd %xmm2, %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
@ -2032,10 +2570,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; AVX-64-NEXT: vcomisd %xmm1, %xmm0
|
||||
; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
|
||||
; AVX-64-NEXT: jb .LBB17_2
|
||||
; AVX-64-NEXT: jb .LBB19_2
|
||||
; AVX-64-NEXT: # %bb.1:
|
||||
; AVX-64-NEXT: vmovapd %xmm1, %xmm3
|
||||
; AVX-64-NEXT: .LBB17_2:
|
||||
; AVX-64-NEXT: .LBB19_2:
|
||||
; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
|
||||
; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
@ -2045,10 +2583,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||
; AVX-64-NEXT: vmovq %rcx, %xmm3
|
||||
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX-64-NEXT: vcomisd %xmm1, %xmm0
|
||||
; AVX-64-NEXT: jb .LBB17_4
|
||||
; AVX-64-NEXT: jb .LBB19_4
|
||||
; AVX-64-NEXT: # %bb.3:
|
||||
; AVX-64-NEXT: vmovapd %xmm1, %xmm2
|
||||
; AVX-64-NEXT: .LBB17_4:
|
||||
; AVX-64-NEXT: .LBB19_4:
|
||||
; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
|
||||
; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
@ -2264,10 +2802,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; SSE-32-NEXT: comiss %xmm2, %xmm0
|
||||
; SSE-32-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-32-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE-32-NEXT: jb .LBB19_2
|
||||
; SSE-32-NEXT: jb .LBB21_2
|
||||
; SSE-32-NEXT: # %bb.1:
|
||||
; SSE-32-NEXT: movaps %xmm2, %xmm3
|
||||
; SSE-32-NEXT: .LBB19_2:
|
||||
; SSE-32-NEXT: .LBB21_2:
|
||||
; SSE-32-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE-32-NEXT: subss %xmm3, %xmm4
|
||||
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
|
||||
@ -2283,10 +2821,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-32-NEXT: comiss %xmm2, %xmm0
|
||||
; SSE-32-NEXT: jb .LBB19_4
|
||||
; SSE-32-NEXT: jb .LBB21_4
|
||||
; SSE-32-NEXT: # %bb.3:
|
||||
; SSE-32-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE-32-NEXT: .LBB19_4:
|
||||
; SSE-32-NEXT: .LBB21_4:
|
||||
; SSE-32-NEXT: subss %xmm1, %xmm0
|
||||
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE-32-NEXT: setae %cl
|
||||
@ -2323,10 +2861,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; SSE-64-NEXT: comiss %xmm3, %xmm0
|
||||
; SSE-64-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE-64-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-64-NEXT: jb .LBB19_2
|
||||
; SSE-64-NEXT: jb .LBB21_2
|
||||
; SSE-64-NEXT: # %bb.1:
|
||||
; SSE-64-NEXT: movaps %xmm3, %xmm1
|
||||
; SSE-64-NEXT: .LBB19_2:
|
||||
; SSE-64-NEXT: .LBB21_2:
|
||||
; SSE-64-NEXT: movaps %xmm0, %xmm4
|
||||
; SSE-64-NEXT: subss %xmm1, %xmm4
|
||||
; SSE-64-NEXT: cvttss2si %xmm4, %rax
|
||||
@ -2337,10 +2875,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; SSE-64-NEXT: movq %rcx, %xmm1
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE-64-NEXT: comiss %xmm3, %xmm0
|
||||
; SSE-64-NEXT: jb .LBB19_4
|
||||
; SSE-64-NEXT: jb .LBB21_4
|
||||
; SSE-64-NEXT: # %bb.3:
|
||||
; SSE-64-NEXT: movaps %xmm3, %xmm2
|
||||
; SSE-64-NEXT: .LBB19_4:
|
||||
; SSE-64-NEXT: .LBB21_4:
|
||||
; SSE-64-NEXT: subss %xmm2, %xmm0
|
||||
; SSE-64-NEXT: cvttss2si %xmm0, %rax
|
||||
; SSE-64-NEXT: setae %cl
|
||||
@ -2366,10 +2904,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; AVX-32-NEXT: vcomiss %xmm1, %xmm3
|
||||
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||
; AVX-32-NEXT: jb .LBB19_2
|
||||
; AVX-32-NEXT: jb .LBB21_2
|
||||
; AVX-32-NEXT: # %bb.1:
|
||||
; AVX-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX-32-NEXT: .LBB19_2:
|
||||
; AVX-32-NEXT: .LBB21_2:
|
||||
; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
@ -2380,10 +2918,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; AVX-32-NEXT: shll $31, %eax
|
||||
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX-32-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-32-NEXT: jb .LBB19_4
|
||||
; AVX-32-NEXT: jb .LBB21_4
|
||||
; AVX-32-NEXT: # %bb.3:
|
||||
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-32-NEXT: .LBB19_4:
|
||||
; AVX-32-NEXT: .LBB21_4:
|
||||
; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX-32-NEXT: flds (%esp)
|
||||
@ -2408,10 +2946,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
||||
; AVX-64-NEXT: jb .LBB19_2
|
||||
; AVX-64-NEXT: jb .LBB21_2
|
||||
; AVX-64-NEXT: # %bb.1:
|
||||
; AVX-64-NEXT: vmovaps %xmm1, %xmm3
|
||||
; AVX-64-NEXT: .LBB19_2:
|
||||
; AVX-64-NEXT: .LBB21_2:
|
||||
; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
|
||||
; AVX-64-NEXT: vcvttss2si %xmm3, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
@ -2421,10 +2959,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
|
||||
; AVX-64-NEXT: vmovq %rcx, %xmm3
|
||||
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
|
||||
; AVX-64-NEXT: jb .LBB19_4
|
||||
; AVX-64-NEXT: jb .LBB21_4
|
||||
; AVX-64-NEXT: # %bb.3:
|
||||
; AVX-64-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-64-NEXT: .LBB19_4:
|
||||
; AVX-64-NEXT: .LBB21_4:
|
||||
; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; AVX-64-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX-64-NEXT: setae %cl
|
||||
|
Loading…
Reference in New Issue
Block a user