mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
afea5f661f
ISD::FROUND is defined to round to nearest with ties rounding away from 0. This mode isn't supported in hardware on X86. But as long as we aren't compiling with trapping math, we can emulate this with floor(X + copysign(nextafter(0.5, 0.0), X)). We have to use nextafter to avoid some corner cases that adding 0.5 would have. For example, if X is nextafter(0.5, 0.0) it should round to 0.0, but adding 0.5 would need one extra bit of mantissa than can be stored so it rounds to 1.0. Adding nextafter(0.5, 0.0) instead will just increase the exponent by 1 and leave the mantissa as all 1s. This would be nextafter(1.0, 0.0) which will floor to 0.0. Techically this requires -fno-trapping-math which isn't our default. But if we care about exceptions we should be using constrained intrinsics. Constrained intrinsics would use STRICT_FROUND which won't go through this code. Fixes PR42195. Differential Revision: https://reviews.llvm.org/D73607
416 lines
16 KiB
LLVM
416 lines
16 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s
|
|
|
|
; PR38527 - https://bugs.llvm.org/show_bug.cgi?id=38527
|
|
|
|
; Use an AVX target to show that the potential problem
|
|
; is not limited to 128-bit types/registers. Ie, widening
|
|
; up to 256-bits may also result in bogus libcalls.
|
|
|
|
; Use fsin as the representative test for various data types.
|
|
|
|
declare <1 x float> @llvm.sin.v1f32(<1 x float>)
|
|
declare <2 x float> @llvm.sin.v2f32(<2 x float>)
|
|
declare <3 x float> @llvm.sin.v3f32(<3 x float>)
|
|
declare <4 x float> @llvm.sin.v4f32(<4 x float>)
|
|
declare <5 x float> @llvm.sin.v5f32(<5 x float>)
|
|
declare <6 x float> @llvm.sin.v6f32(<6 x float>)
|
|
declare <3 x double> @llvm.sin.v3f64(<3 x double>)
|
|
|
|
; Verify that all of the potential libcall candidates are handled.
|
|
; Some of these have custom lowering, so those cases won't have
|
|
; libcalls.
|
|
|
|
declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.cos.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.exp.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.exp2.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.floor.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.log.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.log10.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.log2.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.rint.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.round.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
|
declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
|
|
|
|
define <1 x float> @sin_v1f32(<1 x float> %x) nounwind {
|
|
; CHECK-LABEL: sin_v1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: retq
|
|
%r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x)
|
|
ret <1 x float> %r
|
|
}
|
|
|
|
define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: sin_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
|
|
; CHECK-LABEL: sin_v3f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,0]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <3 x float> @llvm.sin.v3f32(<3 x float> %x)
|
|
ret <3 x float> %r
|
|
}
|
|
|
|
define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
|
|
; CHECK-LABEL: sin_v4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,0]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilps $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[3,1,2,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <4 x float> @llvm.sin.v4f32(<4 x float> %x)
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define <5 x float> @sin_v5f32(<5 x float> %x) nounwind {
|
|
; CHECK-LABEL: sin_v5f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $88, %rsp
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,0]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilps $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[3,1,2,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
; CHECK-NEXT: addq $88, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x)
|
|
ret <5 x float> %r
|
|
}
|
|
|
|
define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
|
|
; CHECK-LABEL: sin_v6f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $88, %rsp
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
|
|
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,0]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilps $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[3,1,2,3]
|
|
; CHECK-NEXT: callq sinf
|
|
; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: addq $88, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x)
|
|
ret <6 x float> %r
|
|
}
|
|
|
|
define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
|
|
; CHECK-LABEL: sin_v3f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $88, %rsp
|
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq sin
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,0]
|
|
; CHECK-NEXT: callq sin
|
|
; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq sin
|
|
; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
; CHECK-NEXT: addq $88, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x)
|
|
ret <3 x double> %r
|
|
}
|
|
|
|
define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: fabs_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @ceil_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: ceil_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundps $10, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @cos_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: cos_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: callq cosf
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq cosf
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.cos.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @exp_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: exp_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: callq expf
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq expf
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.exp.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @exp2_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: exp2_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: callq exp2f
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq exp2f
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @floor_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: floor_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundps $9, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @log_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: log_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: callq logf
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq logf
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.log.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @log10_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: log10_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: callq log10f
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq log10f
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.log10.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @log2_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: log2_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subq $40, %rsp
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
|
|
; CHECK-NEXT: callq log2f
|
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
|
|
; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
|
|
; CHECK-NEXT: callq log2f
|
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
|
; CHECK-NEXT: addq $40, %rsp
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.log2.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @nearbyint__v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: nearbyint__v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundps $12, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @rint_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: rint_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundps $4, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.rint.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @round_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: round_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1
|
|
; CHECK-NEXT: vorps {{.*}}(%rip), %xmm1, %xmm1
|
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.round.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @sqrt_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: sqrt_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsqrtps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @trunc_v2f32(<2 x float> %x) nounwind {
|
|
; CHECK-LABEL: trunc_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%r = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x)
|
|
ret <2 x float> %r
|
|
}
|
|
|