mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 13:11:39 +01:00
dd292a30dc
Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767
462 lines
21 KiB
LLVM
462 lines
21 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s
|
|
|
|
; GCN-LABEL: {{^}}div_1_by_x_25ulp:
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}}
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc
|
|
; GCN-DENORM: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]]
|
|
; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]]
|
|
; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]]
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]]
|
|
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off
|
|
define amdgpu_kernel void @div_1_by_x_25ulp(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%div = fdiv float 1.000000e+00, %load, !fpmath !0
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_minus_1_by_x_25ulp:
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}}
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc
|
|
; GCN-DENORM: v_mul_f32_e64 [[PRESCALED:v[0-9]+]], [[VAL]], -[[SCALE]]
|
|
; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]]
|
|
; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]]
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]]
|
|
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off
|
|
define amdgpu_kernel void @div_minus_1_by_x_25ulp(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%div = fdiv float -1.000000e+00, %load, !fpmath !0
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_1_by_minus_x_25ulp:
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}}
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc
|
|
; GCN-DENORM: v_mul_f32_e64 [[PRESCALED:v[0-9]+]], -[[VAL]], [[SCALE]]
|
|
; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]]
|
|
; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]]
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]]
|
|
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off
|
|
define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%neg = fsub float -0.000000e+00, %load
|
|
%div = fdiv float 1.000000e+00, %neg, !fpmath !0
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_minus_1_by_minus_x_25ulp:
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}}
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc
|
|
; GCN-DENORM: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]]
|
|
; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]]
|
|
; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]]
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]]
|
|
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off
|
|
define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%neg = fsub float -0.000000e+00, %load
|
|
%div = fdiv float -1.000000e+00, %neg, !fpmath !0
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp:
|
|
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT0:[0-9]+]], s[[VAL0]]
|
|
; GCN-FLUSH: v_rcp_f32_e32
|
|
; GCN-FLUSH: v_rcp_f32_e32
|
|
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
|
|
; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off
|
|
define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
|
|
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
|
|
%div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %load, !fpmath !0
|
|
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp:
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e64 v[[OUT0:[0-9]+]], -s[[VAL0]]
|
|
; GCN-FLUSH: v_rcp_f32_e64
|
|
; GCN-FLUSH: v_rcp_f32_e64
|
|
; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]]
|
|
define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
|
|
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
|
|
%div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %load, !fpmath !0
|
|
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp:
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e64 v[[OUT0:[0-9]+]], -s[[VAL0]]
|
|
; GCN-FLUSH: v_rcp_f32_e64
|
|
; GCN-FLUSH: v_rcp_f32_e64
|
|
; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]]
|
|
; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off
|
|
define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
|
|
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
|
|
%neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
|
|
%div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %neg, !fpmath !0
|
|
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp:
|
|
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
; GCN-DENORM-DAG: v_mul_f32_e32
|
|
|
|
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT0:[0-9]+]], s[[VAL0]]
|
|
; GCN-FLUSH: v_rcp_f32_e32
|
|
; GCN-FLUSH: v_rcp_f32_e32
|
|
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
|
|
; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off
|
|
define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
|
|
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
|
|
%neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
|
|
%div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %neg, !fpmath !0
|
|
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp:
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
|
|
; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
|
|
; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]]
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP2]]
|
|
|
|
; GCN-DENORM-DAG: v_div_fmas_f32
|
|
; GCN-DENORM-DAG: v_div_fmas_f32
|
|
; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}}
|
|
|
|
; GCN-FLUSH-DAG: v_rcp_f32_e32
|
|
; GCN-FLUSH-DAG: v_rcp_f32_e64
|
|
|
|
; GCN-NOT: v_cmp_gt_f32_e64
|
|
; GCN-NOT: v_cndmask_b32_e32
|
|
; GCN-FLUSH-NOT: v_div
|
|
|
|
; GCN: global_store_dwordx4
|
|
define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
|
|
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
|
|
%div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %load, !fpmath !0
|
|
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp:
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
|
|
; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
|
|
; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
|
|
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
|
|
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]]
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}}
|
|
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP2]]
|
|
|
|
; GCN-DENORM-DAG: v_div_fmas_f32
|
|
; GCN-DENORM-DAG: v_div_fmas_f32
|
|
; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}}
|
|
; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}}
|
|
|
|
; GCN-FLUSH-DAG: v_rcp_f32_e32
|
|
; GCN-FLUSH-DAG: v_rcp_f32_e64
|
|
|
|
; GCN-NOT: v_cmp_gt_f32_e64
|
|
; GCN-NOT: v_cndmask_b32_e32
|
|
; GCN-FLUSH-NOT: v_div
|
|
|
|
; GCN: global_store_dwordx4
|
|
define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
|
|
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
|
|
%neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
|
|
%div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0
|
|
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_v_by_x_25ulp:
|
|
; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}}
|
|
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM: v_div_fmas_f32
|
|
; GCN-DENORM: v_div_fixup_f32 [[OUT:v[0-9]+]],
|
|
|
|
; GCN-FLUSH-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
|
|
; GCN-FLUSH-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
|
|
; GCN-FLUSH-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]]
|
|
; GCN-FLUSH-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc
|
|
; GCN-FLUSH: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]]
|
|
; GCN-FLUSH: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]]
|
|
; GCN-FLUSH: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]]
|
|
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off
|
|
define amdgpu_kernel void @div_v_by_x_25ulp(float addrspace(1)* %arg, float %num) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%div = fdiv float %num, %load, !fpmath !0
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_1_by_x_fast:
|
|
; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]]
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_1_by_x_fast(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%div = fdiv fast float 1.000000e+00, %load
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_minus_1_by_x_fast:
|
|
; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]]
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_minus_1_by_x_fast(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%div = fdiv fast float -1.000000e+00, %load
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_1_by_minus_x_fast:
|
|
; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]]
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%neg = fsub float -0.000000e+00, %load
|
|
%div = fdiv fast float 1.000000e+00, %neg
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_minus_1_by_minus_x_fast:
|
|
; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]]
|
|
; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_minus_1_by_minus_x_fast(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%neg = fsub float -0.000000e+00, %load
|
|
%div = fdiv fast float -1.000000e+00, %neg
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_1_by_x_correctly_rounded:
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM: v_div_fmas_f32
|
|
; GCN-DENORM: v_div_fixup_f32
|
|
|
|
; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN-FLUSH: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]]
|
|
; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_1_by_x_correctly_rounded(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%div = fdiv float 1.000000e+00, %load
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_minus_1_by_x_correctly_rounded:
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM: v_div_fmas_f32
|
|
; GCN-DENORM: v_div_fixup_f32
|
|
|
|
; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN-FLUSH: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]]
|
|
; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_minus_1_by_x_correctly_rounded(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%div = fdiv float -1.000000e+00, %load
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_1_by_minus_x_correctly_rounded:
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM: v_div_fmas_f32
|
|
; GCN-DENORM: v_div_fixup_f32
|
|
|
|
; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN-FLUSH: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]]
|
|
; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%neg = fsub float -0.000000e+00, %load
|
|
%div = fdiv float 1.000000e+00, %neg
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}div_minus_1_by_minus_x_correctly_rounded:
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM-DAG: v_rcp_f32_e32
|
|
; GCN-DENORM-DAG: v_div_scale_f32
|
|
; GCN-DENORM: v_div_fmas_f32
|
|
; GCN-DENORM: v_div_fixup_f32
|
|
|
|
; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
; GCN-FLUSH: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]]
|
|
; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
|
|
define amdgpu_kernel void @div_minus_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) {
|
|
%load = load float, float addrspace(1)* %arg, align 4
|
|
%neg = fsub float -0.000000e+00, %load
|
|
%div = fdiv float -1.000000e+00, %neg
|
|
store float %div, float addrspace(1)* %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
!0 = !{float 2.500000e+00}
|