mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
AMDGPU: Fix -enable-var-scope violations
llvm-svn: 318004
This commit is contained in:
parent
d207bf89ab
commit
d9c8715837
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
@ -83,7 +83,8 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out
|
||||
; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
|
||||
; SI: s_add_i32
|
||||
; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]]
|
||||
; SI: buffer_store_dword [[VK]]
|
||||
; SI: v_mov_b32_e32 [[VADD:v[0-9]+]], [[ADD]]
|
||||
; SI: buffer_store_dword [[VADD]]
|
||||
define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
%and = and i32 %a, 1234567
|
||||
%foo = add i32 %and, 1234567
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}br_cc_f16:
|
||||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
@ -50,7 +50,7 @@ two:
|
||||
; VI: s_cbranch_vccnz
|
||||
|
||||
; GCN: one{{$}}
|
||||
; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
|
||||
|
||||
; SI: buffer_store_short v[[A_F16]]
|
||||
; SI: s_endpgm
|
||||
@ -89,7 +89,7 @@ two:
|
||||
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
|
||||
|
||||
; GCN: two{{$}}
|
||||
; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
|
||||
; GCN: buffer_store_short v[[B_F16]]
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @br_cc_f16_imm_b(
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=VMEM -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -check-prefix=VGPR -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VMEM -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VGPR -check-prefix=GCN %s
|
||||
|
||||
; Verify registers used for tracking exec mask changes when all
|
||||
; registers are spilled at the end of the block. The SGPR spill
|
||||
@ -171,7 +171,7 @@ end:
|
||||
; GCN: {{^}}; BB#0:
|
||||
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
; VMEM: ds_read_b32 [[LOAD0:v[0-9]+]]
|
||||
; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
|
||||
|
||||
; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
|
||||
@ -308,7 +308,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %o
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
|
||||
; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
|
||||
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], [[SRESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
; EG: BCNT_INT
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
|
||||
; DAGCombiner will transform:
|
||||
; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF))
|
||||
@ -20,7 +20,7 @@ define amdgpu_kernel void @s_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
|
||||
|
||||
; GCN-LABEL: {{^}}s_fabs_f16:
|
||||
; CI: flat_load_ushort [[VAL:v[0-9]+]],
|
||||
; CI: v_and_b32_e32 [[CVT0:v[0-9]+]], 0x7fff, [[VAL]]
|
||||
; CI: v_and_b32_e32 [[RESULT:v[0-9]+]], 0x7fff, [[VAL]]
|
||||
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) {
|
||||
%fabs = call half @llvm.fabs.f16(half %in)
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GCN-FLUSH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,GCN-FLUSH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s
|
||||
|
||||
; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
|
||||
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
||||
@ -381,9 +381,9 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspa
|
||||
|
||||
; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
|
||||
; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7f800001, v{{[0-9]+}}
|
||||
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
|
||||
; GCN-DENORM: v_max_f32_e32 v{{[0-9]+}}, [[V0]], [[V0]]
|
||||
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
||||
; GCN-FLUSH: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
|
||||
; GCN-DENORM: v_max_f32_e32 [[RESULT:v[0-9]+]], [[V0]], [[V0]]
|
||||
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
|
||||
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
||||
@ -395,10 +395,10 @@ define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace
|
||||
}
|
||||
|
||||
; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
|
||||
; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
|
||||
; GFX9: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
|
||||
; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
|
||||
; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
|
||||
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
||||
; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
|
||||
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
|
||||
; GFX9-NOT: 1.0
|
||||
define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -411,10 +411,10 @@ define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspa
|
||||
}
|
||||
|
||||
; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32:
|
||||
; GFX9: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
|
||||
; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, v{{[0-9]+}}
|
||||
; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
|
||||
; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
|
||||
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
||||
; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
|
||||
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
|
||||
; GFX9-NOT: 1.0
|
||||
define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be
|
||||
; beneficial even without fp32 denormals, but they do require no-infs-fp-math
|
||||
@ -237,6 +237,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalia
|
||||
; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
|
||||
; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
|
||||
; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]]
|
||||
; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
@ -271,6 +272,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(
|
||||
; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
|
||||
; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
|
||||
; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
|
||||
; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
|
@ -1,9 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
|
||||
@ -899,7 +899,7 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* %
|
||||
; VI: v_max_f16_e32 v{{[0-9]+}}, 2.0
|
||||
; VI: v_min_f16_e32 v{{[0-9]+}}, 4.0
|
||||
|
||||
; GFX9: v_add_f16_e32 v{{[0-9]+}}, 1.0
|
||||
; GFX9: v_add_f16_e32 [[ADD:v[0-9]+]], 1.0
|
||||
; GFX9: v_med3_f16 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
|
||||
define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -1,12 +1,12 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
|
||||
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare half @llvm.fmuladd.f16(half, half, half) #1
|
||||
@ -441,7 +441,7 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f16(half addrspace(1)* %out, half add
|
||||
|
||||
; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
|
||||
; VI-DENORM-CONTRACT: v_fma_f16 [[R2]], [[R1]], 2.0, -[[R2]]
|
||||
; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
|
||||
; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
||||
; VI-DENORM-STRICT: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; --------------------------------------------------------------------------------
|
||||
; fadd tests
|
||||
@ -59,7 +59,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out
|
||||
; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
|
||||
|
||||
; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
|
||||
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[ADD]]
|
||||
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
|
||||
; GCN: buffer_store_dword [[NEG_ADD]]
|
||||
; GCN-NEXT: buffer_store_dword [[MUL]]
|
||||
define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
@ -513,7 +513,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32(float addrspace(1)*
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
|
||||
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
|
||||
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
|
||||
; GCN-NEXT: buffer_store_dword [[MAX0]]
|
||||
; GCN-NEXT: buffer_store_dword [[MUL1]]
|
||||
define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
@ -660,7 +660,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32(float addrspace(1)*
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
|
||||
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
|
||||
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
|
||||
; GCN-NEXT: buffer_store_dword [[MAX0]]
|
||||
; GCN-NEXT: buffer_store_dword [[MUL1]]
|
||||
define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
|
||||
; GCN-LABEL: {{^}}fpext_f16_to_f32
|
||||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
@ -180,7 +180,7 @@ entry:
|
||||
; GCN-DAG: v_and_b32_e32 [[XOR:v[0-9]+]], 0x7fff, [[A]]
|
||||
|
||||
; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
|
||||
; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]|
|
||||
; GFX89-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]|
|
||||
|
||||
; GCN: store_dword [[CVT]]
|
||||
; GCN: store_short [[XOR]]
|
||||
@ -226,7 +226,7 @@ entry:
|
||||
; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x8000, [[A]]
|
||||
|
||||
; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[OR]]
|
||||
; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]|
|
||||
; GFX89-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]|
|
||||
|
||||
; GCN: buffer_store_dword [[CVT]]
|
||||
; GCN: buffer_store_short [[OR]]
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}fptrunc_f32_to_f16:
|
||||
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
|
||||
@ -65,6 +65,9 @@ entry:
|
||||
; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}}
|
||||
; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}}
|
||||
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
|
||||
;
|
||||
; SI-DAG: v_cvt_f16_f32_e32 v[[CVTHI:[0-9]+]], v[[A_F32_1]]
|
||||
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[CVTHI]]
|
||||
|
||||
; VI: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; Use a 64-bit value with lo bits that can be represented as an inline constant
|
||||
; GCN-LABEL: {{^}}i64_imm_inline_lo:
|
||||
@ -129,7 +129,7 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32:
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}}
|
||||
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}}
|
||||
; GCN: buffer_store_dword [[REG]]
|
||||
define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) {
|
||||
store float 0xBFC45F3060000000, float addrspace(1)* %out
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s
|
||||
|
||||
; GCN-LABEL: {{^}}s_insertelement_v2i16_0:
|
||||
; GCN: s_load_dword [[VEC:s[0-9]+]]
|
||||
@ -81,8 +81,9 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)*
|
||||
; GCN: s_load_dword [[ELT_ARG:s[0-9]+]], s[0:1]
|
||||
; GCN: s_load_dword [[VEC:s[0-9]+]],
|
||||
|
||||
; CIVI-DAG: s_and_b32 [[ELT1:s[0-9]+]], [[VEC]], 0xffff0000{{$}}
|
||||
; CIVI: s_or_b32 s{{[0-9]+}}, [[ELT0]], [[ELT1]]
|
||||
; CIVI-DAG: s_lshr_b32 [[ELT1:s[0-9]+]], [[ELT_ARG]], 16
|
||||
; CIVI-DAG: s_and_b32 [[ELT0:s[0-9]+]], [[VEC]], 0xffff0000{{$}}
|
||||
; CIVI: s_or_b32 s{{[0-9]+}}, [[ELT1]], [[ELT0]]
|
||||
|
||||
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[ELT_ARG]], 16
|
||||
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT1]], [[VEC]]
|
||||
@ -174,7 +175,7 @@ define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_insertelement_v2f16_1:
|
||||
; GFX9: s_load_dword [[VEC:s[0-9]+]]
|
||||
; GCN: s_load_dword [[VEC:s[0-9]+]]
|
||||
; GCN-NOT: s_lshr
|
||||
|
||||
; CIVI: s_and_b32 [[ELT0:s[0-9]+]], [[VEC]], 0xffff{{$}}
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare half @llvm.fma.f16(half %a, half %b, half %c)
|
||||
declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
|
||||
@ -253,7 +253,7 @@ define amdgpu_kernel void @fma_v2f16_imm_b(
|
||||
; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], v[[C_F16]]
|
||||
|
||||
; GCN-NOT: and
|
||||
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
|
||||
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
|
||||
; GCN: buffer_store_dword v[[R_V2_F16]]
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @fma_v2f16_imm_c(
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
|
||||
|
||||
declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
|
||||
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
|
||||
@ -70,7 +70,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
|
||||
; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
|
||||
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
|
||||
; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
|
||||
; SI: v_mac_f32_e32 v[[C_F32]], 0x40400000, v[[B_F32]]
|
||||
; SI: v_mac_f32_e32 v[[C_F32]], 0x40400000, v[[A_F32]]
|
||||
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
|
||||
; SI: buffer_store_short v[[R_F16]]
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare half @llvm.maxnum.f16(half %a, half %b)
|
||||
declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
|
||||
@ -137,8 +137,9 @@ entry:
|
||||
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
|
||||
; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
|
||||
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
|
||||
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
|
||||
; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
|
||||
|
||||
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare half @llvm.minnum.f16(half %a, half %b)
|
||||
declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
|
||||
@ -139,7 +139,7 @@ entry:
|
||||
; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
|
||||
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
|
||||
; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
|
||||
; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
|
||||
|
||||
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
|
||||
|
@ -1,12 +1,12 @@
|
||||
; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
|
||||
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
|
||||
|
||||
; Make sure we don't form mad with denormals
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
@ -231,6 +231,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
|
||||
@ -305,6 +306,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], -[[B]], -[[D]]
|
||||
@ -348,6 +350,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
|
||||
@ -401,7 +404,7 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1
|
||||
|
||||
; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]]
|
||||
; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
|
||||
; SI-DENORM: v_sub_f32_e32 [[RESULT1:v[0-9]+]], [[TMP1]], [[C]]
|
||||
; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}s_pack_v2i16:
|
||||
@ -160,7 +160,7 @@ define amdgpu_kernel void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 {
|
||||
|
||||
; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_hi:
|
||||
; GFX9: global_load_dword [[VAL:v[0-9]+]]
|
||||
; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]]
|
||||
; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL]]
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(i32 addrspace(1)* %in0) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -1,8 +1,7 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
target datalayout = "A5"
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FIXME: i16 promotion pass ruins the scalar cases when legal.
|
||||
; FIXME: r600 fails verifier
|
||||
@ -621,7 +620,7 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16
|
||||
|
||||
; SI: s_sext_i32_i8 [[SSEXT:s[0-9]+]], [[VAL]]
|
||||
; SI: v_mov_b32_e32 [[VSEXT:v[0-9]+]], [[SSEXT]]
|
||||
; SI: buffer_store_short [[VBFE]]
|
||||
; SI: buffer_store_short [[VSEXT]]
|
||||
|
||||
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
|
||||
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; FIXME: Fails with -enable-var-scope
|
||||
|
||||
; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half.
|
||||
@ -92,7 +92,7 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 add
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}}
|
||||
define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
@ -211,7 +211,7 @@ define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}}
|
||||
define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
|
||||
|
||||
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
|
||||
; GCN-POSTLINK: tail call fast float @_Z3sinf(
|
||||
@ -697,8 +697,8 @@ declare float @_Z6sincosfPf(float, float*)
|
||||
%opencl.reserve_id_t = type opaque
|
||||
|
||||
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[NOUNWIND:[0-9]+]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
|
||||
define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
|
||||
entry:
|
||||
%tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
|
||||
@ -719,8 +719,8 @@ declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t ad
|
||||
declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32)
|
||||
|
||||
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
|
||||
; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
|
||||
define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
|
||||
entry:
|
||||
%tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
|
||||
@ -743,15 +743,15 @@ declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_
|
||||
%struct.S = type { [100 x i32] }
|
||||
|
||||
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]]
|
||||
; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]]
|
||||
define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8*
|
||||
@ -783,5 +783,5 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CGN-PRELINK: attributes #[[NOUNWIND]] = { nounwind }
|
||||
; CGN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind }
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
@ -94,7 +94,7 @@ define amdgpu_kernel void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %o
|
||||
|
||||
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
define amdgpu_kernel void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
%load = load i1, i1 addrspace(1)* %in
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_if_scc:
|
||||
; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
|
||||
@ -502,7 +502,7 @@ done:
|
||||
; GCN: s_mov_b32 [[S_VAL]], 1
|
||||
|
||||
; GCN: [[IF_LABEL]]:
|
||||
; GCN: v_mov_b32_e32 [[V_VAL]], [[S_VAL]]
|
||||
; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
|
||||
; GCN: buffer_store_dword [[V_VAL]]
|
||||
define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
@ -60,8 +60,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)*
|
||||
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
|
||||
; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], vcc
|
||||
define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -73,7 +73,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)*
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||
; GCN-DAG: s_load_dword [[Z:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
|
||||
; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc
|
||||
@ -90,8 +91,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)*
|
||||
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc
|
||||
; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], vcc
|
||||
define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
Loading…
Reference in New Issue
Block a user