1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/ARM/pow.75.ll
Nemanja Ivanovic fd45134124 [DAGCombine] Optimize pow(X, 0.75) to sqrt(X) * sqrt(sqrt(X))
The sqrt case is faster and we already do this for the case where
the exponent is 0.25. This adds the 0.75 case which is also not
sensitive to signed zeros.

Patch by Whitney Tsang (Whitney)

Differential revision: https://reviews.llvm.org/D57434

llvm-svn: 353557
2019-02-08 19:50:58 +00:00

71 lines
5.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv8-linux-gnueabihf -mattr=neon -debug 2>&1 | FileCheck %s
; REQUIRES: asserts
declare float @llvm.pow.f32(float, float)
declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
declare double @llvm.pow.f64(double, double)
declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
define float @pow_f32_three_fourth_fmf(float %x) nounwind {
; CHECK: Combining: {{.*}}: f32 = fpow ninf nsz afn [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt ninf nsz afn [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f32 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
%r = call nsz ninf afn float @llvm.pow.f32(float %x, float 7.5e-01)
ret float %r
}
define double @pow_f64_three_fourth_fmf(double %x) nounwind {
; CHECK: Combining: {{.*}}: f64 = fpow ninf nsz afn t2, ConstantFP:f64<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt ninf nsz afn [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f64 = fmul ninf nsz afn [[SQRT]], [[SQRTSQRT]]
%r = call nsz ninf afn double @llvm.pow.f64(double %x, double 7.5e-01)
ret double %r
}
define <4 x float> @pow_v4f32_three_fourth_fmf(<4 x float> %x) nounwind {
; CHECK: Combining: {{.*}}: v4f32 = BUILD_VECTOR [[FORTH:t[0-9]+]], [[THIRD:t[0-9]+]], [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]]
; CHECK: Combining: [[FIRST]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK: Combining: [[SECOND]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK: Combining: [[THIRD]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK: Combining: [[FORTH]]: f32 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f32<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f32 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f32 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
%r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 7.5e-1, float 7.5e-1, float 7.5e-01, float 7.5e-01>)
ret <4 x float> %r
}
define <2 x double> @pow_v2f64_three_fourth_fmf(<2 x double> %x) nounwind {
; CHECK: Combining: {{.*}}: v2f64 = BUILD_VECTOR [[SECOND:t[0-9]+]], [[FIRST:t[0-9]+]]
; CHECK: Combining: [[FIRST]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t16, t17
; CHECK: Combining: [[SECOND]]: f64 = fpow nnan ninf nsz arcp contract afn reassoc [[X:t[0-9]+]], ConstantFP:f64<7.500000e-01>
; CHECK-NEXT: Creating new node: [[SQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[X]]
; CHECK-NEXT: Creating new node: [[SQRTSQRT:t[0-9]+]]: f64 = fsqrt nnan ninf nsz arcp contract afn reassoc [[SQRT]]
; CHECK-NEXT: Creating new node: [[R:t[0-9]+]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc [[SQRT]], [[SQRTSQRT]]
; CHECK-NEXT: ... into: [[R]]: f64 = fmul nnan ninf nsz arcp contract afn reassoc t19, t20
%r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 7.5e-1, double 7.5e-1>)
ret <2 x double> %r
}