From 49530a6327595e16a8121fae92cf7047aafc9abe Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 22 Nov 2019 11:57:08 +0000 Subject: [PATCH] [X86] Updated strict fp scalar tests and add fp80 tests for D68857 --- test/CodeGen/X86/fp-strict-scalar.ll | 293 ++++++++++++++++++++----- test/CodeGen/X86/fp80-strict-scalar.ll | 220 +++++++++++++++++++ 2 files changed, 464 insertions(+), 49 deletions(-) create mode 100644 test/CodeGen/X86/fp80-strict-scalar.ll diff --git a/test/CodeGen/X86/fp-strict-scalar.ll b/test/CodeGen/X86/fp-strict-scalar.ll index dcbcc9377d1..724095e8aca 100644 --- a/test/CodeGen/X86/fp-strict-scalar.ll +++ b/test/CodeGen/X86/fp-strict-scalar.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=X87 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) @@ -15,9 +15,13 @@ declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare float @llvm.experimental.constrained.fptrunc.f64.f32(double, metadata, metadata) +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) -define double @f1(double %a, double %b) nounwind strictfp { -; SSE-X86-LABEL: f1: +define double @fadd_f64(double %a, double %b) nounwind strictfp { +; SSE-X86-LABEL: fadd_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp @@ -31,12 +35,12 @@ define double @f1(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f1: +; SSE-X64-LABEL: fadd_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: addsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f1: +; AVX-X86-LABEL: fadd_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp @@ -50,12 +54,12 @@ define double @f1(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f1: +; AVX-X64-LABEL: fadd_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f1: +; X87-LABEL: fadd_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: faddl {{[0-9]+}}(%esp) @@ -66,8 +70,8 @@ define double @f1(double %a, double %b) nounwind strictfp { ret double %ret } -define float @f2(float %a, float %b) nounwind strictfp { -; SSE-X86-LABEL: f2: +define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { +; SSE-X86-LABEL: fadd_fsub_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -77,12 +81,12 @@ define float @f2(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f2: +; SSE-X64-LABEL: fadd_fsub_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: addss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f2: +; AVX-X86-LABEL: fadd_fsub_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -92,12 +96,12 @@ define float @f2(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f2: +; AVX-X64-LABEL: fadd_fsub_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f2: +; X87-LABEL: fadd_fsub_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fadds {{[0-9]+}}(%esp) @@ -108,8 +112,8 @@ define float @f2(float %a, float %b) nounwind strictfp { ret float %ret } -define double @f3(double %a, double %b) nounwind strictfp { -; SSE-X86-LABEL: f3: +define double @fsub_f64(double %a, double %b) nounwind strictfp { +; SSE-X86-LABEL: fsub_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp @@ -123,12 +127,12 @@ define double @f3(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f3: +; SSE-X64-LABEL: fsub_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: subsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f3: +; AVX-X86-LABEL: fsub_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp @@ -142,12 +146,12 @@ define double @f3(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f3: +; AVX-X64-LABEL: fsub_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f3: +; X87-LABEL: fsub_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fsubl {{[0-9]+}}(%esp) @@ -158,8 +162,8 @@ define double @f3(double %a, double %b) nounwind strictfp { ret double %ret } -define float @f4(float %a, float %b) nounwind strictfp { -; SSE-X86-LABEL: f4: +define float @fsub_f32(float %a, float %b) nounwind strictfp { +; SSE-X86-LABEL: fsub_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -169,12 +173,12 @@ define float @f4(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f4: +; SSE-X64-LABEL: fsub_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: subss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f4: +; AVX-X86-LABEL: fsub_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -184,12 +188,12 @@ define float @f4(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f4: +; AVX-X64-LABEL: fsub_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f4: +; X87-LABEL: fsub_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fsubs {{[0-9]+}}(%esp) @@ -200,8 +204,8 @@ define float @f4(float %a, float %b) nounwind strictfp { ret float %ret } -define double @f5(double %a, double %b) nounwind strictfp { -; SSE-X86-LABEL: f5: +define double @fmul_f64(double %a, double %b) nounwind strictfp { +; SSE-X86-LABEL: fmul_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp @@ -215,12 +219,12 @@ define double @f5(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f5: +; SSE-X64-LABEL: fmul_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: mulsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f5: +; AVX-X86-LABEL: fmul_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp @@ -234,12 +238,12 @@ define double @f5(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f5: +; AVX-X64-LABEL: fmul_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f5: +; X87-LABEL: fmul_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fmull {{[0-9]+}}(%esp) @@ -250,8 +254,8 @@ define double @f5(double %a, double %b) nounwind strictfp { ret double %ret } -define float @f6(float %a, float %b) nounwind strictfp { -; SSE-X86-LABEL: f6: +define float @fmul_f32(float %a, float %b) nounwind strictfp { +; SSE-X86-LABEL: fmul_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -261,12 +265,12 @@ define float @f6(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f6: +; SSE-X64-LABEL: fmul_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: mulss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f6: +; AVX-X86-LABEL: fmul_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -276,12 +280,12 @@ define float @f6(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f6: +; AVX-X64-LABEL: fmul_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f6: +; X87-LABEL: fmul_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fmuls {{[0-9]+}}(%esp) @@ -292,8 +296,8 @@ define float @f6(float %a, float %b) nounwind strictfp { ret float %ret } -define double @f7(double %a, double %b) nounwind strictfp { -; SSE-X86-LABEL: f7: +define double @fdiv_f64(double %a, double %b) nounwind strictfp { +; SSE-X86-LABEL: fdiv_f64: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: movl %esp, %ebp @@ -307,12 +311,12 @@ define double @f7(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f7: +; SSE-X64-LABEL: fdiv_f64: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: divsd %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f7: +; AVX-X86-LABEL: fdiv_f64: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %ebp ; AVX-X86-NEXT: movl %esp, %ebp @@ -326,12 +330,12 @@ define double @f7(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f7: +; AVX-X64-LABEL: fdiv_f64: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f7: +; X87-LABEL: fdiv_f64: ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fdivl {{[0-9]+}}(%esp) @@ -342,8 +346,8 @@ define double @f7(double %a, double %b) nounwind strictfp { ret double %ret } -define float @f8(float %a, float %b) nounwind strictfp { -; SSE-X86-LABEL: f8: +define float @fdiv_f32(float %a, float %b) nounwind strictfp { +; SSE-X86-LABEL: fdiv_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -353,12 +357,12 @@ define float @f8(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: f8: +; SSE-X64-LABEL: fdiv_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: divss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: f8: +; AVX-X86-LABEL: fdiv_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -368,12 +372,12 @@ define float @f8(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: f8: +; AVX-X64-LABEL: fdiv_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vdivss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: f8: +; X87-LABEL: fdiv_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fdivs {{[0-9]+}}(%esp) @@ -384,4 +388,195 @@ define float @f8(float %a, float %b) nounwind strictfp { ret float %ret } +define void @fpext_f32_to_f64(float* %val, double* %ret) nounwind strictfp { +; SSE-X86-LABEL: fpext_f32_to_f64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: cvtss2sd %xmm0, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%eax) +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: fpext_f32_to_f64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X64-NEXT: cvtss2sd %xmm0, %xmm0 +; SSE-X64-NEXT: movsd %xmm0, (%rsi) +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: fpext_f32_to_f64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%eax) +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fpext_f32_to_f64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X64-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: vmovsd %xmm0, (%rsi) +; AVX-X64-NEXT: retq +; +; X87-LABEL: fpext_f32_to_f64: +; X87: # %bb.0: +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X87-NEXT: flds (%ecx) +; X87-NEXT: fstpl (%eax) +; X87-NEXT: retl + %1 = load float, float* %val, align 4 + %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1, + metadata !"fpexcept.strict") #0 + store double %res, double* %ret, align 8 + ret void +} + +define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp { +; SSE-X86-LABEL: fptrunc_double_to_f32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%eax) +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: fptrunc_double_to_f32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X64-NEXT: cvtsd2ss %xmm0, %xmm0 +; SSE-X64-NEXT: movss %xmm0, (%rsi) +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: fptrunc_double_to_f32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%eax) +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fptrunc_double_to_f32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X64-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: vmovss %xmm0, (%rsi) +; AVX-X64-NEXT: retq +; +; X87-LABEL: fptrunc_double_to_f32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X87-NEXT: fldl (%ecx) +; X87-NEXT: fstps (%esp) +; X87-NEXT: flds (%esp) +; X87-NEXT: fstps (%eax) +; X87-NEXT: popl %eax +; X87-NEXT: retl + %1 = load double, double* %val, align 8 + %res = call float @llvm.experimental.constrained.fptrunc.f64.f32(double %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %ret, align 4 + ret void +} + +define void @fsqrt_f64(double* %a) nounwind strictfp { +; SSE-X86-LABEL: fsqrt_f64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: sqrtsd %xmm0, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%eax) +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: fsqrt_f64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X64-NEXT: sqrtsd %xmm0, %xmm0 +; SSE-X64-NEXT: movsd %xmm0, (%rdi) +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: fsqrt_f64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%eax) +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fsqrt_f64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: vmovsd %xmm0, (%rdi) +; AVX-X64-NEXT: retq +; +; X87-LABEL: fsqrt_f64: +; X87: # %bb.0: +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: fldl (%eax) +; X87-NEXT: fsqrt +; X87-NEXT: fstpl (%eax) +; X87-NEXT: retl + %1 = load double, double* %a, align 8 + %res = call double @llvm.experimental.constrained.sqrt.f64(double %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store double %res, double* %a, align 8 + ret void +} + +define void @fsqrt_f32(float* %a) nounwind strictfp { +; SSE-X86-LABEL: fsqrt_f32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: sqrtss %xmm0, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%eax) +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: fsqrt_f32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X64-NEXT: sqrtss %xmm0, %xmm0 +; SSE-X64-NEXT: movss %xmm0, (%rdi) +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: fsqrt_f32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%eax) +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fsqrt_f32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: vmovss %xmm0, (%rdi) +; AVX-X64-NEXT: retq +; +; X87-LABEL: fsqrt_f32: +; X87: # %bb.0: +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: flds (%eax) +; X87-NEXT: fsqrt +; X87-NEXT: fstps (%eax) +; X87-NEXT: retl + %1 = load float, float* %a, align 4 + %res = call float @llvm.experimental.constrained.sqrt.f32(float %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %a, align 4 + ret void +} + attributes #0 = { strictfp } diff --git a/test/CodeGen/X86/fp80-strict-scalar.ll b/test/CodeGen/X86/fp80-strict-scalar.ll new file mode 100644 index 00000000000..1fc5d019619 --- /dev/null +++ b/test/CodeGen/X86/fp80-strict-scalar.ll @@ -0,0 +1,220 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 + +declare x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float, metadata) +declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double, metadata) +declare x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80, metadata, metadata) + +define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { +; X86-LABEL: fadd_fp80: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: faddp %st, %st(1) +; X86-NEXT: retl +; +; X64-LABEL: fadd_fp80: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: faddp %st, %st(1) +; X64-NEXT: retq + %ret = call x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define x86_fp80 @fsub_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { +; X86-LABEL: fsub_fp80: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fsubp %st, %st(1) +; X86-NEXT: retl +; +; X64-LABEL: fsub_fp80: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fsubp %st, %st(1) +; X64-NEXT: retq + %ret = call x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define x86_fp80 @fmul_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { +; X86-LABEL: fmul_fp80: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fmulp %st, %st(1) +; X86-NEXT: retl +; +; X64-LABEL: fmul_fp80: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fmulp %st, %st(1) +; X64-NEXT: retq + %ret = call x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define x86_fp80 @fdiv_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { +; X86-LABEL: fdiv_fp80: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fdivp %st, %st(1) +; X86-NEXT: retl +; +; X64-LABEL: fdiv_fp80: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fdivp %st, %st(1) +; X64-NEXT: retq + %ret = call x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80 %a, x86_fp80 %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %ret +} + +define void @fpext_f32_to_fp80(float* %val, x86_fp80* %ret) nounwind strictfp { +; X86-LABEL: fpext_f32_to_fp80: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds (%ecx) +; X86-NEXT: fstpt (%eax) +; X86-NEXT: retl +; +; X64-LABEL: fpext_f32_to_fp80: +; X64: # %bb.0: +; X64-NEXT: flds (%rdi) +; X64-NEXT: fstpt (%rsi) +; X64-NEXT: retq + %1 = load float, float* %val, align 4 + %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %1, + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %ret, align 16 + ret void +} + +define void @fpext_f64_to_fp80(double* %val, x86_fp80* %ret) nounwind strictfp { +; X86-LABEL: fpext_f64_to_fp80: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldl (%ecx) +; X86-NEXT: fstpt (%eax) +; X86-NEXT: retl +; +; X64-LABEL: fpext_f64_to_fp80: +; X64: # %bb.0: +; X64-NEXT: fldl (%rdi) +; X64-NEXT: fstpt (%rsi) +; X64-NEXT: retq + %1 = load double, double* %val, align 8 + %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %1, + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %ret, align 16 + ret void +} + +define void @fptrunc_fp80_to_f32(x86_fp80* %val, float *%ret) nounwind strictfp { +; X86-LABEL: fptrunc_fp80_to_f32: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fstps (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: fstps (%eax) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: fptrunc_fp80_to_f32: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fstps -{{[0-9]+}}(%rsp) +; X64-NEXT: flds -{{[0-9]+}}(%rsp) +; X64-NEXT: fstps (%rsi) +; X64-NEXT: retq + %1 = load x86_fp80, x86_fp80* %val, align 16 + %res = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %res, float* %ret, align 4 + ret void +} + +define void @fptrunc_fp80_to_f64(x86_fp80* %val, double* %ret) nounwind strictfp { +; X86-LABEL: fptrunc_fp80_to_f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: fldt (%ecx) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: fstpl (%eax) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: fptrunc_fp80_to_f64: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fstpl -{{[0-9]+}}(%rsp) +; X64-NEXT: fldl -{{[0-9]+}}(%rsp) +; X64-NEXT: fstpl (%rsi) +; X64-NEXT: retq + %1 = load x86_fp80, x86_fp80* %val, align 16 + %res = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store double %res, double* %ret, align 8 + ret void +} + +define void @fsqrt_fp80(x86_fp80* %a) nounwind strictfp { +; X86-LABEL: fsqrt_fp80: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldt (%eax) +; X86-NEXT: fsqrt +; X86-NEXT: fstpt (%eax) +; X86-NEXT: retl +; +; X64-LABEL: fsqrt_fp80: +; X64: # %bb.0: +; X64-NEXT: fldt (%rdi) +; X64-NEXT: fsqrt +; X64-NEXT: fstpt (%rdi) +; X64-NEXT: retq + %1 = load x86_fp80, x86_fp80* %a, align 16 + %res = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store x86_fp80 %res, x86_fp80* %a, align 16 + ret void +} + +attributes #0 = { strictfp }