mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
18451cc4a4
The motivation is that the update script has at least two deviations (`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from what pretty much all the checklines were generated with, and most of the tests are still not updated, so each time one of the non-up-to-date tests is updated to see the effect of the code change, there is a lot of noise. Instead of having to deal with that each time, let's just deal with everything at once. This has been done via: ``` cd llvm-project/llvm/test/CodeGen/X86 grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" | xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc ``` Not all tests were regenerated, however.
143 lines
4.3 KiB
LLVM
143 lines
4.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s -check-prefix=X64
|
|
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86
|
|
|
|
declare float @llvm.sqrt.f32(float %x);
|
|
|
|
define dso_local float @fast_recip_sqrt(float %x) {
|
|
; X64-LABEL: fast_recip_sqrt:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: rsqrtss %xmm0, %xmm1
|
|
; X64-NEXT: mulss %xmm1, %xmm0
|
|
; X64-NEXT: mulss %xmm1, %xmm0
|
|
; X64-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
|
; X64-NEXT: mulss %xmm1, %xmm0
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-LABEL: fast_recip_sqrt:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: flds {{[0-9]+}}(%esp)
|
|
; X86-NEXT: fsqrt
|
|
; X86-NEXT: fld1
|
|
; X86-NEXT: fdivp %st, %st(1)
|
|
; X86-NEXT: retl
|
|
%y = call fast float @llvm.sqrt.f32(float %x)
|
|
%z = fdiv fast float 1.0, %y
|
|
ret float %z
|
|
}
|
|
|
|
declare float @llvm.fmuladd.f32(float %a, float %b, float %c);
|
|
|
|
define dso_local float @fast_fmuladd_opts(float %a , float %b , float %c) {
|
|
; X64-LABEL: fast_fmuladd_opts:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-LABEL: fast_fmuladd_opts:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: flds {{[0-9]+}}(%esp)
|
|
; X86-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
|
|
; X86-NEXT: retl
|
|
%res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a)
|
|
ret float %res
|
|
}
|
|
|
|
; The multiply is strict.
|
|
|
|
@mul1 = common dso_local global double 0.000000e+00, align 4
|
|
|
|
define dso_local double @not_so_fast_mul_add(double %x) {
|
|
; X64-LABEL: not_so_fast_mul_add:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
; X64-NEXT: mulsd %xmm0, %xmm1
|
|
; X64-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-NEXT: movsd %xmm1, mul1(%rip)
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-LABEL: not_so_fast_mul_add:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: fldl {{[0-9]+}}(%esp)
|
|
; X86-NEXT: fld %st(0)
|
|
; X86-NEXT: fmull {{\.?LCPI[0-9]+_[0-9]+}}
|
|
; X86-NEXT: fxch %st(1)
|
|
; X86-NEXT: fmull {{\.?LCPI[0-9]+_[0-9]+}}
|
|
; X86-NEXT: fxch %st(1)
|
|
; X86-NEXT: fstpl mul1
|
|
; X86-NEXT: retl
|
|
%m = fmul double %x, 4.2
|
|
%a = fadd fast double %m, %x
|
|
store double %m, double* @mul1, align 4
|
|
ret double %a
|
|
}
|
|
|
|
; The sqrt is strict.
|
|
|
|
@sqrt1 = common dso_local global float 0.000000e+00, align 4
|
|
|
|
define dso_local float @not_so_fast_recip_sqrt(float %x) {
|
|
; X64-LABEL: not_so_fast_recip_sqrt:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: rsqrtss %xmm0, %xmm1
|
|
; X64-NEXT: sqrtss %xmm0, %xmm2
|
|
; X64-NEXT: mulss %xmm1, %xmm0
|
|
; X64-NEXT: mulss %xmm1, %xmm0
|
|
; X64-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
|
; X64-NEXT: mulss %xmm1, %xmm0
|
|
; X64-NEXT: movss %xmm2, sqrt1(%rip)
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-LABEL: not_so_fast_recip_sqrt:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: flds {{[0-9]+}}(%esp)
|
|
; X86-NEXT: fsqrt
|
|
; X86-NEXT: fld1
|
|
; X86-NEXT: fdiv %st(1), %st
|
|
; X86-NEXT: fxch %st(1)
|
|
; X86-NEXT: fstps sqrt1
|
|
; X86-NEXT: retl
|
|
%y = call float @llvm.sqrt.f32(float %x)
|
|
%z = fdiv fast float 1.0, %y
|
|
store float %y, float* @sqrt1, align 4
|
|
%ret = fadd float %z , 14.5
|
|
ret float %z
|
|
}
|
|
|
|
define dso_local float @div_arcp_by_const(half %x) {
|
|
; X64-LABEL: div_arcp_by_const:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: .cfi_def_cfa_offset 16
|
|
; X64-NEXT: movzwl %di, %edi
|
|
; X64-NEXT: callq __gnu_h2f_ieee@PLT
|
|
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-NEXT: callq __gnu_f2h_ieee@PLT
|
|
; X64-NEXT: movzwl %ax, %edi
|
|
; X64-NEXT: popq %rax
|
|
; X64-NEXT: .cfi_def_cfa_offset 8
|
|
; X64-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL
|
|
;
|
|
; X86-LABEL: div_arcp_by_const:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %eax
|
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl %eax, (%esp)
|
|
; X86-NEXT: calll __gnu_h2f_ieee
|
|
; X86-NEXT: fmuls {{\.?LCPI[0-9]+_[0-9]+}}
|
|
; X86-NEXT: fstps (%esp)
|
|
; X86-NEXT: calll __gnu_f2h_ieee
|
|
; X86-NEXT: movzwl %ax, %eax
|
|
; X86-NEXT: movl %eax, (%esp)
|
|
; X86-NEXT: calll __gnu_h2f_ieee
|
|
; X86-NEXT: popl %eax
|
|
; X86-NEXT: .cfi_def_cfa_offset 4
|
|
; X86-NEXT: retl
|
|
%rcp = fdiv arcp half %x, 10.0
|
|
%z = fpext half %rcp to float
|
|
ret float %z
|
|
}
|