mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
3021ef095b
We now consider the FPOpFusion flag when determining whether to fuse ops. We also explicitly emit add.rn when fusion is disabled to prevent ptxas from fusing the operations on its own. llvm-svn: 213287
34 lines
1.1 KiB
LLVM
34 lines
1.1 KiB
LLVM
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
|
|
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
|
|
|
|
target triple = "nvptx64-unknown-cuda"
|
|
|
|
;; Make sure we are generating proper instruction sequences for fused ops
|
|
;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit
|
|
;; add.f32 otherwise. Without an explicit rounding mode on add.f32, ptxas
|
|
;; is free to fuse with a multiply if it is able. If fusion is not allowed,
|
|
;; we do not form fma.rn at the PTX level and explicitly generate add.rn
|
|
;; for all adds to prevent ptxas from fusion the ops.
|
|
|
|
;; FAST-LABEL: @t0
|
|
;; DEFAULT-LABEL: @t0
|
|
define float @t0(float %a, float %b, float %c) {
|
|
;; FAST: fma.rn.f32
|
|
;; DEFAULT: mul.rn.f32
|
|
;; DEFAULT: add.rn.f32
|
|
%v0 = fmul float %a, %b
|
|
%v1 = fadd float %v0, %c
|
|
ret float %v1
|
|
}
|
|
|
|
;; FAST-LABEL: @t1
|
|
;; DEFAULT-LABEL: @t1
|
|
define float @t1(float %a, float %b) {
|
|
;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
|
|
;; to prevent ptxas from fusing this with anything else.
|
|
;; FAST: add.f32
|
|
;; DEFAULT: add.rn.f32
|
|
%v1 = fadd float %a, %b
|
|
ret float %v1
|
|
}
|