mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-24 13:33:37 +02:00
c230fcbb58
I think this is safe as long as no inputs are known to ever be nans. Also add an intrinsic for fmed3 to be able to handle all safe math cases. llvm-svn: 293598
897 lines
44 KiB
LLVM
897 lines
44 KiB
LLVM
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s
|
|
; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s
|
|
|
|
; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
|
|
; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, v{{[0-9]+}}
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
|
|
define void @v_test_nnan_input_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
%a.add = fadd nnan float %a, 1.0
|
|
%max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
|
|
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
|
|
|
store float %med, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f32:
|
|
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
|
|
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
define void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
|
|
%max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
|
|
|
store float %med, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute0_f32:
|
|
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
|
|
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
define void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
|
|
%max = call float @llvm.maxnum.f32(float 2.0, float %a)
|
|
%med = call float @llvm.minnum.f32(float 4.0, float %max)
|
|
|
|
store float %med, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute1_f32:
|
|
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
|
|
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
define void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
|
|
%max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
%med = call float @llvm.minnum.f32(float 4.0, float %max)
|
|
|
|
store float %med, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_constant_order_f32:
|
|
; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
define void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
|
|
%max = call float @llvm.maxnum.f32(float %a, float 4.0)
|
|
%med = call float @llvm.minnum.f32(float %max, float 2.0)
|
|
|
|
store float %med, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_multi_use_f32:
|
|
; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
|
|
%max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
|
|
|
store volatile float %med, float addrspace(1)* %outgep
|
|
store volatile float %max, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
|
|
; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
|
|
; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
|
|
define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
|
|
%a = load double, double addrspace(1)* %gep0
|
|
|
|
%max = call double @llvm.maxnum.f64(double %a, double 2.0)
|
|
%med = call double @llvm.minnum.f64(double %max, double 4.0)
|
|
|
|
store double %med, double addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32:
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
define void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
|
|
%max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
|
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
|
|
|
store float %med, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32:
|
|
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
|
|
|
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load float, float addrspace(1)* %gep0
|
|
|
|
; fmax_legacy
|
|
%cmp0 = fcmp ule float %a, 2.0
|
|
%max = select i1 %cmp0, float 2.0, float %a
|
|
|
|
; fmin_legacy
|
|
%cmp1 = fcmp uge float %max, 4.0
|
|
%med = select i1 %cmp1, float 4.0, float %max
|
|
|
|
store float %med, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%a.fneg = fsub float -0.0, %a
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%b.fneg = fsub float -0.0, %b
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%c.fneg = fsub float -0.0, %c
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]|
|
|
define void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
|
|
%a.fneg = fsub float -0.0, %a
|
|
%b.fabs = call float @llvm.fabs.f32(float %b)
|
|
%c.fabs = call float @llvm.fabs.f32(float %c)
|
|
%c.fabs.fneg = fsub float -0.0, %c.fabs
|
|
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]|
|
|
define void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
|
|
%a.fabs = call float @llvm.fabs.f32(float %a)
|
|
%a.fabs.fneg = fsub float -0.0, %a.fabs
|
|
%b.fabs = call float @llvm.fabs.f32(float %b)
|
|
%b.fabs.fneg = fsub float -0.0, %b.fabs
|
|
%c.fabs = call float @llvm.fabs.f32(float %c)
|
|
%c.fabs.fneg = fsub float -0.0, %c.fabs
|
|
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
|
|
; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
|
|
; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
|
|
define void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
|
|
%a.nnan = fadd nnan float %a, 1.0
|
|
%b.nnan = fadd nnan float %b, 2.0
|
|
%c.nnan = fadd nnan float %c, 4.0
|
|
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; 16 combinations
|
|
|
|
; 0: max(min(x, y), min(max(x, y), z))
|
|
; 1: max(min(x, y), min(max(y, x), z))
|
|
; 2: max(min(x, y), min(z, max(x, y)))
|
|
; 3: max(min(x, y), min(z, max(y, x)))
|
|
; 4: max(min(y, x), min(max(x, y), z))
|
|
; 5: max(min(y, x), min(max(y, x), z))
|
|
; 6: max(min(y, x), min(z, max(x, y)))
|
|
; 7: max(min(y, x), min(z, max(y, x)))
|
|
;
|
|
; + commute outermost max
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
|
|
define void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %b, float %a)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
|
|
%tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; ---------------------------------------------------------------------
|
|
; Negative patterns
|
|
; ---------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use0:
|
|
; GCN: v_min_f32
|
|
; GCN: v_max_f32
|
|
; GCN: v_min_f32
|
|
; GCN: v_max_f32
|
|
define void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
store volatile float %tmp0, float addrspace(1)* undef
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use1:
|
|
define void @v_test_safe_med3_f32_pat0_multi_use1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
store volatile float %tmp1, float addrspace(1)* undef
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use2:
|
|
define void @v_test_safe_med3_f32_pat0_multi_use2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
store volatile float %tmp2, float addrspace(1)* undef
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0:
|
|
define void @v_test_safe_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%tmp0 = call float @llvm.minnum.f32(float %a, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_nnan_inputs_missing0_med3_f32_pat0:
|
|
define void @v_nnan_inputs_missing0_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
|
|
%a.nnan = fadd float %a, 1.0
|
|
%b.nnan = fadd nnan float %b, 2.0
|
|
%c.nnan = fadd nnan float %c, 4.0
|
|
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_nnan_inputs_missing1_med3_f32_pat0:
|
|
define void @v_nnan_inputs_missing1_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
|
|
%a.nnan = fadd nnan float %a, 1.0
|
|
%b.nnan = fadd float %b, 2.0
|
|
%c.nnan = fadd nnan float %c, 4.0
|
|
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_nnan_inputs_missing2_med3_f32_pat0:
|
|
define void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
|
|
%a.nnan = fadd nnan float %a, 1.0
|
|
%b.nnan = fadd nnan float %b, 2.0
|
|
%c.nnan = fadd float %c, 4.0
|
|
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_min_f32
|
|
; GCN: v_max_f32
|
|
; GCN: v_min_f32
|
|
; GCN: v_max_f32
|
|
define void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%a.fneg = fsub float -0.0, %a
|
|
%tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
|
|
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
|
|
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
|
|
store float %med3, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
; A simple min and max is not sufficient
|
|
; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32:
|
|
; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
|
|
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[B]], [[A]]
|
|
; GCN: v_min_f32_e32 v{{[0-9]+}}, [[C]], [[MAX]]
|
|
define void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
|
%gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
|
|
%gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
|
|
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
|
%a = load volatile float, float addrspace(1)* %gep0
|
|
%b = load volatile float, float addrspace(1)* %gep1
|
|
%c = load volatile float, float addrspace(1)* %gep2
|
|
%max = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%minmax = call float @llvm.minnum.f32(float %max, float %c)
|
|
store float %minmax, float addrspace(1)* %outgep
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
declare float @llvm.fabs.f32(float) #0
|
|
declare float @llvm.minnum.f32(float, float) #0
|
|
declare float @llvm.maxnum.f32(float, float) #0
|
|
declare double @llvm.minnum.f64(double, double) #0
|
|
declare double @llvm.maxnum.f64(double, double) #0
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
|
|
attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
|