mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
e3fb38059f
Summary: Previously if you had * a function with the fast-math-enabled attr, followed by * a function without the fast-math attr, the second function would inherit the first function's fast-math-ness. This means that mixing fast-math and non-fast-math functions in a module was completely broken unless you explicitly annotated every non-fast-math function with "unsafe-fp-math"="false". This appears to have been broken since r176986 (March 2013), when the resetTargetOptions function was introduced. This patch tests the correct behavior as best we can. I don't think I can test FPDenormalMode and NoTrappingFPMath, because they aren't used in any backends during function lowering. Surprisingly, I also can't find any uses at all of LessPreciseFPMAD affecting generated code. The NVPTX/fast-math.ll test changes are an expected result of fixing this bug. When FMA is disabled, we emit add as "add.rn.f32", which prevents fma combining. Before this patch, fast-math was enabled in all functions following the one which explicitly enabled it on itself, so we were emitting plain "add.f32" where we should have generated "add.rn.f32". Reviewers: mkuper Subscribers: hfinkel, majnemer, jholewinski, nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D28507 llvm-svn: 291618 |
||
---|---|---|
.. | ||
access-non-generic.ll | ||
add-128bit.ll | ||
addrspacecast-gvar.ll | ||
addrspacecast.ll | ||
aggr-param.ll | ||
aggregate-return.ll | ||
alias.ll | ||
annotations.ll | ||
arg-lowering.ll | ||
arithmetic-fp-sm20.ll | ||
arithmetic-int.ll | ||
atomics-with-scope.ll | ||
atomics.ll | ||
bfe.ll | ||
branch-fold.ll | ||
bug17709.ll | ||
bug21465.ll | ||
bug22246.ll | ||
bug22322.ll | ||
bug26185-2.ll | ||
bug26185.ll | ||
bypass-div.ll | ||
call-with-alloca-buffer.ll | ||
callchain.ll | ||
calling-conv.ll | ||
combine-min-max.ll | ||
compare-int.ll | ||
constant-vectors.ll | ||
convergent-mir-call.ll | ||
convert-fp.ll | ||
convert-int-sm20.ll | ||
ctlz.ll | ||
ctpop.ll | ||
cttz.ll | ||
debug-file-loc.ll | ||
disable-opt.ll | ||
div-ri.ll | ||
divrem-combine.ll | ||
envreg.ll | ||
extloadv.ll | ||
fast-math.ll | ||
fma-assoc.ll | ||
fma-disable.ll | ||
fma.ll | ||
fp16.ll | ||
fp-contract.ll | ||
fp-literals.ll | ||
function-align.ll | ||
generic-to-nvvm-ir.ll | ||
generic-to-nvvm.ll | ||
global-addrspace.ll | ||
global-ctor-empty.ll | ||
global-ctor.ll | ||
global-dtor.ll | ||
global-ordering.ll | ||
global-visibility.ll | ||
globals_init.ll | ||
globals_lowering.ll | ||
gvar-init.ll | ||
half.ll | ||
i1-global.ll | ||
i1-int-to-fp.ll | ||
i1-param.ll | ||
i8-param.ll | ||
imad.ll | ||
implicit-def.ll | ||
inline-asm.ll | ||
intrin-nocapture.ll | ||
intrinsic-old.ll | ||
intrinsics.ll | ||
isspacep.ll | ||
ld-addrspace.ll | ||
ld-generic.ll | ||
ldg-invariant.ll | ||
ldparam-v4.ll | ||
ldu-i8.ll | ||
ldu-ldg.ll | ||
ldu-reg-plus-offset.ll | ||
lit.local.cfg | ||
load-sext-i1.ll | ||
load-with-non-coherent-cache.ll | ||
LoadStoreVectorizer.ll | ||
local-stack-frame.ll | ||
loop-vectorize.ll | ||
lower-aggr-copies.ll | ||
lower-alloca.ll | ||
lower-kernel-ptr-arg.ll | ||
machine-sink.ll | ||
MachineSink-call.ll | ||
MachineSink-convergent.ll | ||
managed.ll | ||
math-intrins.ll | ||
misaligned-vector-ldst.ll | ||
module-inline-asm.ll | ||
mulwide.ll | ||
noduplicate-syncthreads.ll | ||
nounroll.ll | ||
nvcl-param-align.ll | ||
nvvm-reflect-module-flag.ll | ||
nvvm-reflect.ll | ||
param-align.ll | ||
pr13291-i1-store.ll | ||
pr16278.ll | ||
pr17529.ll | ||
refl1.ll | ||
reg-copy.ll | ||
reg-types.ll | ||
rotate.ll | ||
rsqrt.ll | ||
sched1.ll | ||
sched2.ll | ||
sext-in-reg.ll | ||
sext-params.ll | ||
shfl.ll | ||
shift-parts.ll | ||
simple-call.ll | ||
sm-version-20.ll | ||
sm-version-21.ll | ||
sm-version-30.ll | ||
sm-version-32.ll | ||
sm-version-35.ll | ||
sm-version-37.ll | ||
sm-version-50.ll | ||
sm-version-52.ll | ||
sm-version-53.ll | ||
sm-version-60.ll | ||
sm-version-61.ll | ||
sm-version-62.ll | ||
speculative-execution-divergent-target.ll | ||
st-addrspace.ll | ||
st-generic.ll | ||
surf-read-cuda.ll | ||
surf-read.ll | ||
surf-write-cuda.ll | ||
surf-write.ll | ||
symbol-naming.ll | ||
TailDuplication-convergent.ll | ||
tex-read-cuda.ll | ||
tex-read.ll | ||
texsurf-queries.ll | ||
tid-range.ll | ||
tuple-literal.ll | ||
vec8.ll | ||
vec-param-load.ll | ||
vector-args.ll | ||
vector-call.ll | ||
vector-compare.ll | ||
vector-global.ll | ||
vector-loads.ll | ||
vector-select.ll | ||
vector-stores.ll | ||
weak-global.ll | ||
weak-linkage.ll | ||
zero-cs.ll | ||
zeroext-32bit.ll |