mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[NVPTX] Add (1.0 / sqrt(x)) => rsqrt(x) generation when allowable by FP flags
llvm-svn: 185178
This commit is contained in:
parent
f17855a9dc
commit
434a514175
@ -136,7 +136,8 @@ def hasLDG : Predicate<"Subtarget.hasLDG()">;
|
||||
def hasLDU : Predicate<"Subtarget.hasLDU()">;
|
||||
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
|
||||
|
||||
def doF32FTZ : Predicate<"UseF32FTZ">;
|
||||
def doF32FTZ : Predicate<"UseF32FTZ==1">;
|
||||
def doNoF32FTZ : Predicate<"UseF32FTZ==0">;
|
||||
|
||||
def doFMAF32 : Predicate<"doFMAF32">;
|
||||
def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
|
||||
@ -765,6 +766,16 @@ def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
|
||||
(fdiv Float32Regs:$a, fpimm:$b))]>,
|
||||
Requires<[reqPTX20]>;
|
||||
|
||||
//
|
||||
// F32 rsqrt
|
||||
//
|
||||
|
||||
def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
|
||||
"rsqrt.approx.f32 \t$dst, $b;", []>;
|
||||
|
||||
def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
|
||||
(RSQRTF32approx1r Float32Regs:$b)>,
|
||||
Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
|
||||
|
||||
multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
|
||||
def rrr : NVPTXInst<(outs Float32Regs:$dst),
|
||||
|
13
test/CodeGen/NVPTX/rsqrt.ll
Normal file
13
test/CodeGen/NVPTX/rsqrt.ll
Normal file
@ -0,0 +1,13 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=1 -nvptx-prec-sqrtf32=0 | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
declare float @llvm.nvvm.sqrt.f(float)
|
||||
|
||||
define float @foo(float %a) {
|
||||
; CHECK: rsqrt.approx.f32
|
||||
%val = tail call float @llvm.nvvm.sqrt.f(float %a)
|
||||
%ret = fdiv float 1.0, %val
|
||||
ret float %ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user