1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-25 05:52:53 +02:00
llvm-mirror/test/CodeGen/AMDGPU/fneg.f64.ll
Matt Arsenault 2dfb6d03c5 AMDGPU: Run SIFoldOperands after PeepholeOptimizer
PeepholeOptimizer cleans up redundant copies, which makes
the operand folding more effective.

shader-db stats:

Totals:
SGPRS: 34200 -> 34336 (0.40 %)
VGPRS: 22118 -> 21655 (-2.09 %)
Code Size: 632144 -> 633460 (0.21 %) bytes
LDS: 11 -> 11 (0.00 %) blocks
Scratch: 10240 -> 11264 (10.00 %) bytes per wave
Max Waves: 8822 -> 8918 (1.09 %)
Wait states: 0 -> 0 (0.00 %)

Totals from affected shaders:
SGPRS: 7704 -> 7840 (1.77 %)
VGPRS: 5169 -> 4706 (-8.96 %)
Code Size: 234444 -> 235760 (0.56 %) bytes
LDS: 2 -> 2 (0.00 %) blocks
Scratch: 0 -> 1024 (0.00 %) bytes per wave
Max Waves: 1188 -> 1284 (8.08 %)
Wait states: 0 -> 0 (0.00 %)

Increases:
SGPRS: 35 (0.01 %)
VGPRS: 1 (0.00 %)
Code Size: 59 (0.02 %)
LDS: 0 (0.00 %)
Scratch: 1 (0.00 %)
Max Waves: 48 (0.02 %)
Wait states: 0 (0.00 %)

Decreases:
SGPRS: 26 (0.01 %)
VGPRS: 54 (0.02 %)
Code Size: 68 (0.03 %)
LDS: 0 (0.00 %)
Scratch: 0 (0.00 %)
Max Waves: 4 (0.00 %)
Wait states: 0 (0.00 %)

llvm-svn: 266378
2016-04-14 21:58:24 +00:00

61 lines
2.1 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f64:
; GCN: v_xor_b32
define void @fneg_f64(double addrspace(1)* %out, double %in) {
%fneg = fsub double -0.000000e+00, %in
store double %fneg, double addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}fneg_v2f64:
; GCN: v_xor_b32
; GCN: v_xor_b32
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}fneg_v4f64:
; R600: -PV
; R600: -T
; R600: -PV
; R600: -PV
; GCN: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
store <4 x double> %fneg, <4 x double> addrspace(1)* %out
ret void
}
; DAGCombiner will transform:
; (fneg (f64 bitcast (i64 a))) => (f64 bitcast (xor (i64 a), 0x80000000))
; unless the target returns true for isNegFree()
; FUNC-LABEL: {{^}}fneg_free_f64:
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fsub = fsub double 0.0, %bc
store double %fsub, double addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}fneg_fold_f64:
; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN-NOT: xor
; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
%fsub = fsub double -0.0, %in
%fmul = fmul double %fsub, %in
store double %fmul, double addrspace(1)* %out
ret void
}