1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/AMDGPU/fneg-combines.ll
Matt Arsenault 6c04830c65 AMDGPU: Fix infinite loop in DAG combine with fneg + fma
We were not reporting isFNegFree for v2f32, although it is effectively
free after legalization. The generic combine was pulling fneg out of
the fma source operands, and the AMDGPU combine was doing the
opposite.
2021-06-18 19:09:03 -04:00

2628 lines
124 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI %s
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI %s
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI %s
; --------------------------------------------------------------------------------
; fadd tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_add_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%add = fadd float %a, %b
%fneg = fneg float %add
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%add = fadd float %a, %b
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
store volatile float %add, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%add = fadd float %a, %b
%fneg = fneg float %add
%use1 = fmul float %add, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE: v_sub_f32_e32
; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%add = fadd float %fneg.a, %b
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.b = fneg float %b
%add = fadd float %a, %fneg.b
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%add = fadd float %fneg.a, %fneg.b
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
; GCN-SAFE-DAG: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1{{$}}
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[SIGNBIT]], [[A]]
; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[SIGNBIT]], [[ADD]]
; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%add = fadd float %fneg.a, %b
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%add = fadd float %fneg.a, %b
%fneg = fneg float %add
%use1 = fmul float %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; This one asserted with -enable-no-signed-zeros-fp-math
; GCN-LABEL: {{^}}fneg_fadd_0:
; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
.entry:
%tmp7 = fdiv float 1.000000e+00, %tmp6
%tmp8 = fmul float 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
%.i188 = fadd float %tmp9, 0.000000e+00
%tmp10 = fcmp uge float %.i188, %tmp2
%tmp11 = fneg float %.i188
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
%tmp12 = fcmp ule float %.i092, 0.000000e+00
%.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
ret float %.i198
}
; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
; function attribute unsafe-fp-math automatically. Combine with the previous test
; when that is done.
; GCN-LABEL: {{^}}fneg_fadd_0_nsz:
; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]],
; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]],
; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
.entry:
%tmp7 = fdiv afn float 1.000000e+00, %tmp6
%tmp8 = fmul float 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
%.i188 = fadd float %tmp9, 0.000000e+00
%tmp10 = fcmp uge float %.i188, %tmp2
%tmp11 = fneg float %.i188
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
%tmp12 = fcmp ule float %.i092, 0.000000e+00
%.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
ret float %.i198
}
; --------------------------------------------------------------------------------
; fmul tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_mul_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %mul, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
%use1 = fmul float %mul, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%mul = fmul float %fneg.a, %b
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.b = fneg float %b
%mul = fmul float %a, %fneg.b
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%mul = fmul float %fneg.a, %fneg.b
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%mul = fmul float %fneg.a, %b
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%mul = fmul float %fneg.a, %b
%fneg = fneg float %mul
%use1 = fmul float %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; --------------------------------------------------------------------------------
; fminnum tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
; GCN-NOT: v0
; GCN-NOT: v1
; GCN: v_max_f32_e64 v0, -v0, -v1
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fneg float %min
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float %a, float %a)
%min.fneg = fneg float %min
store float %min.fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_max_f32_e64 v0, -v0, -v0
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
%min = call float @llvm.minnum.f32(float %a, float %a)
%min.fneg = fneg float %min
ret float %min.fneg
}
; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 4.0, float %a)
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_max_f32_e64 v0, -v0, -4.0
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
%min = call float @llvm.minnum.f32(float 4.0, float %a)
%fneg = fneg float %min
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float -4.0, float %a)
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_max_f32_e64 v0, -v0, 4.0
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
%min = call float @llvm.minnum.f32(float -4.0, float %a)
%fneg = fneg float %min
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 0.0, float %a)
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float -0.0, float %a)
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile half, half addrspace(1)* %a.gep
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
%fneg = fsub half -0.000000e+00, %min
store half %fneg, half addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile half, half addrspace(1)* %a.gep
%min = call half @llvm.minnum.f16(half 0xHB118, half %a)
%fneg = fsub half -0.000000e+00, %min
store half %fneg, half addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
%fneg = fsub double -0.000000e+00, %min
store double %fneg, double addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
%fneg = fsub double -0.000000e+00, %min
store double %fneg, double addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
%min = call float @llvm.minnum.f32(float -0.0, float %a)
%fneg = fneg float %min
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float 0.0, float %a)
%fneg = fneg float %min
%mul = fmul float %fneg, %b
store float %mul, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
%fneg = fneg float %min
%mul = fmul float %fneg, %b
store float %mul, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
; GCN-NOT: v0
; GCN-NOT: v1
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
%min = call float @llvm.minnum.f32(float 0.0, float %a)
%fneg = fneg float %min
%mul = fmul float %fneg, %b
ret float %mul
}
; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fneg float %min
%use1 = fmul float %min, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
; GCN-NOT: v0
; GCN-NOT: v1
; GCN: v_max_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: ; return
define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fneg float %min
%use1 = fmul float %min, 4.0
%ins0 = insertelement <2 x float> undef, float %fneg, i32 0
%ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
ret <2 x float> %ins1
}
; --------------------------------------------------------------------------------
; fmaxnum tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
; GCN-NOT: v0
; GCN-NOT: v1
; GCN: v_min_f32_e64 v0, -v0, -v1
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fneg float %max
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float %a, float %a)
%max.fneg = fneg float %max
store float %max.fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_min_f32_e64 v0, -v0, -v0
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
%max = call float @llvm.maxnum.f32(float %a, float %a)
%max.fneg = fneg float %max
ret float %max.fneg
}
; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float 4.0, float %a)
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_min_f32_e64 v0, -v0, -4.0
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
%max = call float @llvm.maxnum.f32(float 4.0, float %a)
%fneg = fneg float %max
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float -4.0, float %a)
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_min_f32_e64 v0, -v0, 4.0
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
%max = call float @llvm.maxnum.f32(float -4.0, float %a)
%fneg = fneg float %max
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float -0.0, float %a)
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
; GCN-NOT: v0
; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
%max = call float @llvm.maxnum.f32(float -0.0, float %a)
%fneg = fneg float %max
ret float %fneg
}
; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
%fneg = fneg float %max
%mul = fmul float %fneg, %b
store float %mul, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
; GCN-NOT: v0
; GCN-NOT: v1
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
%fneg = fneg float %max
%mul = fmul float %fneg, %b
ret float %mul
}
; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fneg float %max
%use1 = fmul float %max, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
; GCN-NOT: v0
; GCN-NOT: v1
; GCN: v_min_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: ; return
define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fneg float %max
%use1 = fmul float %max, 4.0
%ins0 = insertelement <2 x float> undef, float %fneg, i32 0
%ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
ret <2 x float> %ins1
}
; --------------------------------------------------------------------------------
; fma tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_fma_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
%fneg = fneg float %fma
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fma, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
%fneg = fneg float %fma
%use1 = fmul float %fma, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
; GCN-NSZ-NOT: [[FMA]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
; GCN-NSZ-NOT: [[FMA]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.b = fneg float %b
%fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
; GCN-NSZ-NOT: [[FMA]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
; GCN-NSZ-NOT: [[FMA]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%fneg.c = fneg float %c
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
; GCN-NSZ-NOT: [[FMA]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.c = fneg float %c
%fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_xor_b32
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
; GCN-SAFE: v_xor_b32
; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
; GCN-NSZ-NOT: [[FMA]]
; GCN-NSZ-NOT: [[NEG_A]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
; GCN-NSZ-NOT: [[NEG_A]]
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-SAFE-DAG: v_fma_f32 [[FMA:v[0-9]+]]
; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
%fneg = fneg float %fma
%use1 = fmul float %fneg.a, %d
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; --------------------------------------------------------------------------------
; fmad tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_fmad_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
%fneg = fneg float %fma
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fmad_v4f32:
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
define amdgpu_kernel void @v_fneg_fmad_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %a.ptr, <4 x float> addrspace(1)* %b.ptr, <4 x float> addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
%a = load volatile <4 x float>, <4 x float> addrspace(1)* %a.gep
%b = load volatile <4 x float>, <4 x float> addrspace(1)* %b.gep
%c = load volatile <4 x float>, <4 x float> addrspace(1)* %c.gep
%fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
%fneg = fneg <4 x float> %fma
store <4 x float> %fneg, <4 x float> addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]]
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
%fneg = fneg float %fma
%use1 = fmul float %fma, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; --------------------------------------------------------------------------------
; fp_extend tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fpext = fpext float %a to double
%fneg = fsub double -0.000000e+00, %fpext
store double %fneg, double addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%fpext = fpext float %fneg.a to double
%fneg = fsub double -0.000000e+00, %fpext
store double %fneg, double addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%fpext = fpext float %fneg.a to double
%fneg = fsub double -0.000000e+00, %fpext
store volatile double %fneg, double addrspace(1)* %out.gep
store volatile float %fneg.a, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fpext = fpext float %a to double
%fneg = fsub double -0.000000e+00, %fpext
store volatile double %fneg, double addrspace(1)* %out.gep
store volatile double %fpext, double addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fpext = fpext float %a to double
%fneg = fsub double -0.000000e+00, %fpext
%mul = fmul double %fpext, 4.0
store volatile double %fneg, double addrspace(1)* %out.gep
store volatile double %mul, double addrspace(1)* %out.gep
ret void
}
; FIXME: Source modifiers not folded for f16->f32
; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile half, half addrspace(1)* %a.gep
%fpext = fpext half %a to float
%fneg = fneg float %fpext
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %fpext, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile half, half addrspace(1)* %a.gep
%fpext = fpext half %a to float
%fneg = fneg float %fpext
%mul = fmul float %fpext, 4.0
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %mul, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; fp_round tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%fpround = fptrunc double %a to float
%fneg = fneg float %fpround
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%fneg.a = fsub double -0.000000e+00, %a
%fpround = fptrunc double %fneg.a to float
%fneg = fneg float %fpround
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%fneg.a = fsub double -0.000000e+00, %a
%fpround = fptrunc double %fneg.a to float
%fneg = fneg float %fpround
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile double %fneg.a, double addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%fneg.a = fsub double -0.000000e+00, %a
%fpround = fptrunc double %fneg.a to float
%fneg = fneg float %fpround
%use1 = fmul double %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile double %use1, double addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fpround = fptrunc float %a to half
%fneg = fsub half -0.000000e+00, %fpround
store half %fneg, half addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%fpround = fptrunc float %fneg.a to half
%fneg = fsub half -0.000000e+00, %fpround
store half %fneg, half addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%fpround = fptrunc double %a to float
%fneg = fneg float %fpround
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %fpround, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%fpround = fptrunc float %fneg.a to half
%fneg = fsub half -0.000000e+00, %fpround
store volatile half %fneg, half addrspace(1)* %out.gep
store volatile float %fneg.a, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%fpround = fptrunc float %fneg.a to half
%fneg = fsub half -0.000000e+00, %fpround
%use1 = fmul float %fneg.a, %c
store volatile half %fneg, half addrspace(1)* %out.gep
store volatile float %use1, float addrspace(1)* undef
ret void
}
; --------------------------------------------------------------------------------
; rcp tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_rcp_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%rcp = call float @llvm.amdgcn.rcp.f32(float %a)
%fneg = fneg float %rcp
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
%fneg = fneg float %rcp
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
%fneg = fneg float %rcp
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %fneg.a, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fneg float %a
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
%fneg = fneg float %rcp
%use1 = fmul float %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %use1, float addrspace(1)* undef
ret void
}
; --------------------------------------------------------------------------------
; fmul_legacy tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%fneg = fneg float %mul
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %mul, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%fneg = fneg float %mul
%use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.b = fneg float %b
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fneg float %a
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
%fneg = fneg float %mul
%use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
ret void
}
; --------------------------------------------------------------------------------
; sin tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_sin_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%sin = call float @llvm.sin.f32(float %a)
%fneg = fneg float %sin
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%sin = call float @llvm.amdgcn.sin.f32(float %a)
%fneg = fneg float %sin
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; ftrunc tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_trunc_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%trunc = call float @llvm.trunc.f32(float %a)
%fneg = fneg float %trunc
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; fround tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_round_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_trunc_f32_e32
; GCN: v_sub_f32_e32
; GCN: v_cndmask_b32
; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%round = call float @llvm.round.f32(float %a)
%fneg = fneg float %round
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; rint tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_rint_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%rint = call float @llvm.rint.f32(float %a)
%fneg = fneg float %rint
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; nearbyint tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%nearbyint = call float @llvm.nearbyint.f32(float %a)
%fneg = fneg float %nearbyint
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; fcanonicalize tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%trunc = call float @llvm.canonicalize.f32(float %a)
%fneg = fneg float %trunc
store float %fneg, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; vintrp tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
%intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
%intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
store volatile float %intrp0, float addrspace(1)* %out.gep
store volatile float %intrp1, float addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
%intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
%intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
store volatile float %intrp0, float addrspace(1)* %out.gep
store volatile float %intrp1, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; CopyToReg tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
; GCN: s_cbranch_scc0
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
; GCN: s_endpgm
; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
%cmp0 = icmp eq i32 %d, 0
br i1 %cmp0, label %if, label %endif
if:
%mul1 = fmul float %fneg, %c
store volatile float %mul1, float addrspace(1)* %out.gep
br label %endif
endif:
store volatile float %mul, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; inlineasm tests
; --------------------------------------------------------------------------------
; Can't fold into use, so should fold into source
; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
; GCN: ; use [[MUL]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
call void asm sideeffect "; use $0", "v"(float %fneg) #0
store volatile float %fneg, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; inlineasm tests
; --------------------------------------------------------------------------------
; Can't fold into use, so should fold into source
; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
; GCN: ; use [[NEG]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%mul = fmul float %a, %b
%fneg = fneg float %mul
call void asm sideeffect "; use $0", "v"(float %fneg) #0
store volatile float %mul, float addrspace(1)* %out.gep
ret void
}
; --------------------------------------------------------------------------------
; code size regression tests
; --------------------------------------------------------------------------------
; There are multiple users of the fneg that must use a VOP3
; instruction, so there is no penalty
; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
%fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
store volatile float %fma0, float addrspace(1)* %out
store volatile float %fma1, float addrspace(1)* %out
ret void
}
; There are multiple users, but both require using a larger encoding
; for the modifier.
; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%mul0 = fmul float %fneg.a, %b
%mul1 = fmul float %fneg.a, %c
store volatile float %mul0, float addrspace(1)* %out
store volatile float %mul1, float addrspace(1)* %out
ret void
}
; One user is VOP3 so has no cost to folding the modifier, the other does.
; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fneg float %a
%fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
%mul1 = fmul float %fneg.a, %c
store volatile float %fma0, float addrspace(1)* %out
store volatile float %mul1, float addrspace(1)* %out
ret void
}
; The use of the fneg requires a code size increase, but folding into
; the source does not
; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%d = load volatile float, float addrspace(1)* %d.gep
%fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
%fneg.fma0 = fneg float %fma0
%mul1 = fmul float %fneg.fma0, %c
%mul2 = fmul float %fneg.fma0, %d
store volatile float %mul1, float addrspace(1)* %out
store volatile float %mul2, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
%d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%b = load volatile double, double addrspace(1)* %b.gep
%c = load volatile double, double addrspace(1)* %c.gep
%d = load volatile double, double addrspace(1)* %d.gep
%fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
%fneg.fma0 = fsub double -0.0, %fma0
%mul1 = fmul double %fneg.fma0, %c
%mul2 = fmul double %fneg.fma0, %d
store volatile double %mul1, double addrspace(1)* %out
store volatile double %mul2, double addrspace(1)* %out
ret void
}
; %trunc.a has one fneg use, but it requires a code size increase and
; %the fneg can instead be folded for free into the fma.
; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%d = load volatile float, float addrspace(1)* %d.gep
%trunc.a = call float @llvm.trunc.f32(float %a)
%trunc.fneg.a = fneg float %trunc.a
%fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
store volatile float %fma0, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
%c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
%d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%d = load volatile float, float addrspace(1)* %d.gep
%trunc.a = call float @llvm.trunc.f32(float %a)
%trunc.fneg.a = fneg float %trunc.a
%fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
%mul1 = fmul float %trunc.a, %d
store volatile float %fma0, float addrspace(1)* %out
store volatile float %mul1, float addrspace(1)* %out
ret void
}
; The AMDGPU combine to pull fneg into the FMA operands was being
; undone by the generic combine to pull the fneg out of the fma if
; !isFNegFree. We were reporting false for v2f32 even though it will
; be split into f32 where it will be free.
; GCN-LABEL: {{^}}fneg_fma_fneg_dagcombine_loop:
; GCN: s_brev_b32 [[NEGZERO:s[0-9]+]], 1{{$}}
; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], v2, -v4, [[NEGZERO]]
; GCN-DAG: v_fma_f32 [[FMA1:v[0-9]+]], v3, -v5, [[NEGZERO]]
; GCN-DAG: v_sub_f32_e32 [[SUB0:v[0-9]+]], [[FMA0]], v0
; GCN-DAG: v_sub_f32_e32 [[SUB1:v[0-9]+]], [[FMA1]], v1
; GCN-DAG: v_mul_f32_e32 v0, [[SUB0]], v4
; GCN-DAG: v_mul_f32_e32 v1, [[SUB1]], v5
; GCN: s_setpc_b64
define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
bb:
%i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
%i4 = fadd fast <2 x float> %i3, %arg
%i5 = fneg <2 x float> %i4
%i6 = fmul fast <2 x float> %i5, %arg2
ret <2 x float> %i6
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fma.f32(float, float, float) #1
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare float @llvm.fmuladd.f32(float, float, float) #1
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
declare float @llvm.sin.f32(float) #1
declare float @llvm.trunc.f32(float) #1
declare float @llvm.round.f32(float) #1
declare float @llvm.rint.f32(float) #1
declare float @llvm.nearbyint.f32(float) #1
declare float @llvm.canonicalize.f32(float) #1
declare float @llvm.minnum.f32(float, float) #1
declare float @llvm.maxnum.f32(float, float) #1
declare half @llvm.minnum.f16(half, half) #1
declare double @llvm.minnum.f64(double, double) #1
declare double @llvm.fma.f64(double, double, double) #1
declare float @llvm.amdgcn.sin.f32(float) #1
declare float @llvm.amdgcn.rcp.f32(float) #1
declare float @llvm.amdgcn.rcp.legacy(float) #1
declare float @llvm.amdgcn.fmul.legacy(float, float) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "unsafe-fp-math"="true" }
attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }