mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
660643de83
Since P8 is the oldest machine supported by MASSV pass, _massv place holder is removed and the oldest version of MASSV functions is assumed. If the P9 vector specific is detected in the compilation process, the P8 prefix will be updated to P9. Differential Revision: https://reviews.llvm.org/D98064
245 lines
9.3 KiB
LLVM
245 lines
9.3 KiB
LLVM
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
|
|
; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
|
|
|
|
; Exponent is a variable
|
|
define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_var
|
|
; CHECK-PWR9: bl __powf4_P9
|
|
; CHECK-PWR8: bl __powf4_P8
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %z, i64 %index
|
|
%next.gep31 = getelementptr float, float* %y, i64 %index
|
|
%next.gep32 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep32 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = bitcast float* %next.gep31 to <4 x float>*
|
|
%wide.load33 = load <4 x float>, <4 x float>* %1, align 4
|
|
%2 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> %wide.load33)
|
|
%3 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %2, <4 x float>* %3, align 4
|
|
%index.next = add i64 %index, 4
|
|
%4 = icmp eq i64 %index.next, 1024
|
|
br i1 %4, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is a constant != 0.75 and !=0.25
|
|
define void @vspow_const(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_const
|
|
; CHECK-PWR9: bl __powf4_P9
|
|
; CHECK-PWR8: bl __powf4_P8
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is a constant != 0.75 and !=0.25 and they are different
|
|
define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_neq_const
|
|
; CHECK-PWR9: bl __powf4_P9
|
|
; CHECK-PWR8: bl __powf4_P8
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is a constant != 0.75 and !=0.25
|
|
define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_neq075_const
|
|
; CHECK-PWR9: bl __powf4_P9
|
|
; CHECK-PWR8: bl __powf4_P8
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is a constant != 0.75 and !=0.25
|
|
define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_neq025_const
|
|
; CHECK-PWR9: bl __powf4_P9
|
|
; CHECK-PWR8: bl __powf4_P8
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is 0.75
|
|
define void @vspow_075(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_075
|
|
; CHECK-NOT: bl __powf4_P{{[8,9]}}
|
|
; CHECK: xvrsqrtesp
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call ninf afn <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is 0.25
|
|
define void @vspow_025(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_025
|
|
; CHECK-NOT: bl __powf4_P{{[8,9]}}
|
|
; CHECK: xvrsqrtesp
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is 0.75 but no proper fast-math flags
|
|
define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_075_nofast
|
|
; CHECK-PWR9: bl __powf4_P9
|
|
; CHECK-PWR8: bl __powf4_P8
|
|
; CHECK-NOT: xvrsqrtesp
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Exponent is 0.25 but no proper fast-math flags
|
|
define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x) {
|
|
; CHECK-LABEL: @vspow_025_nofast
|
|
; CHECK-PWR9: bl __powf4_P9
|
|
; CHECK-PWR8: bl __powf4_P8
|
|
; CHECK-NOT: xvrsqrtesp
|
|
; CHECK: blr
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
|
%next.gep = getelementptr float, float* %y, i64 %index
|
|
%next.gep19 = getelementptr float, float* %x, i64 %index
|
|
%0 = bitcast float* %next.gep19 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %0, align 4
|
|
%1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
|
|
%2 = bitcast float* %next.gep to <4 x float>*
|
|
store <4 x float> %1, <4 x float>* %2, align 4
|
|
%index.next = add i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %for.end, label %vector.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind readnone speculatable willreturn
|
|
declare <4 x float> @__powf4_P8(<4 x float>, <4 x float>)
|