1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AMDGPU] Switch scalarize global loads ON by default

Differential revision: https://reviews.llvm.org/D34407

llvm-svn: 307097
This commit is contained in:
Alexander Timofeev 2017-07-04 17:32:00 +00:00
parent 289d0ebad4
commit 71cf453d98
142 changed files with 788 additions and 558 deletions

View File

@ -85,7 +85,7 @@ static cl::opt<bool> EnableLoadStoreVectorizer(
static cl::opt<bool> ScalarizeGlobal(
"amdgpu-scalarize-global-loads",
cl::desc("Enable global load scalarization"),
cl::init(false),
cl::init(true),
cl::Hidden);
// Option to run internalize pass.

View File

@ -5,9 +5,9 @@
;FUNC-LABEL: {{^}}test1:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
;SI-NOT: [[REG]]
;SI: buffer_store_dword [[REG]],
;SI: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}}
;SI: v_mov_b32_e32 v[[REG]], s[[REG]]
;SI: buffer_store_dword v[[REG]],
define amdgpu_kernel void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
@ -21,8 +21,8 @@ define amdgpu_kernel void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@ -39,10 +39,10 @@ define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspa
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1

View File

@ -19,10 +19,10 @@ define amdgpu_kernel void @test_i128_vreg(i128 addrspace(1)* noalias %out, i128
; Check that the SGPR add operand is correctly moved to a VGPR.
; GCN-LABEL: {{^}}sgpr_operand:
; GCN: v_add_i32
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: s_add_u32
; GCN: s_addc_u32
; GCN: s_addc_u32
; GCN: s_addc_u32
define amdgpu_kernel void @sgpr_operand(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
%foo = load i128, i128 addrspace(1)* %in, align 8
%result = add i128 %foo, %a
@ -31,10 +31,10 @@ define amdgpu_kernel void @sgpr_operand(i128 addrspace(1)* noalias %out, i128 ad
}
; GCN-LABEL: {{^}}sgpr_operand_reversed:
; GCN: v_add_i32
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: s_add_u32
; GCN: s_addc_u32
; GCN: s_addc_u32
; GCN: s_addc_u32
define amdgpu_kernel void @sgpr_operand_reversed(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
%foo = load i128, i128 addrspace(1)* %in, align 8
%result = add i128 %a, %foo

View File

@ -19,8 +19,8 @@ define amdgpu_kernel void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 add
; Check that the SGPR add operand is correctly moved to a VGPR.
; SI-LABEL: {{^}}sgpr_operand:
; SI: v_add_i32
; SI: v_addc_u32
; SI: s_add_u32
; SI: s_addc_u32
define amdgpu_kernel void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
%foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %foo, %a
@ -32,8 +32,8 @@ define amdgpu_kernel void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addr
; SGPR as other operand.
;
; SI-LABEL: {{^}}sgpr_operand_reversed:
; SI: v_add_i32
; SI: v_addc_u32
; SI: s_add_u32
; SI: s_addc_u32
define amdgpu_kernel void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
%foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %a, %foo

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}alignbit_shr_pat:
; GCN-DAG: s_load_dword s[[SHR:[0-9]+]]

View File

@ -2,8 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}v_and_i64_br:
; SI: v_and_b32
; SI: v_and_b32
; SI: s_and_b64
define amdgpu_kernel void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
entry:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0

View File

@ -8,8 +8,8 @@ declare i32 @llvm.r600.read.tidig.x() #0
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@ -26,10 +26,11 @@ define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspa
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
@ -136,7 +137,9 @@ define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrs
; FUNC-LABEL: {{^}}v_and_constant_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@ -145,7 +148,9 @@ define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrsp
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, 64
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@ -154,7 +159,9 @@ define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 a
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, -16
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@ -239,8 +246,11 @@ define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
; SI: v_and_b32
; SI: v_and_b32
define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid
%b = load i64, i64 addrspace(1)* %gep.b, align 8
%and = and i64 %a, %b
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -251,7 +261,9 @@ define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 1231231234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -299,26 +311,30 @@ define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
}
; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant:
; SI: buffer_load_dword [[VAL:v[0-9]+]]
; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]]
; SI-NOT: and
; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]]
; SI-NOT: and
; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
; SI: buffer_load_dword v{{[0-9]+}}
; SI: {{buffer|flat}}_load_dword v{{[0-9]+}}
; SI-NOT: and
; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
; SI-NOT: and
; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -326,13 +342,15 @@ define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addr
; FIXME: Should be able to reduce load width
; FUNC-LABEL: {{^}}v_and_inline_neg_imm_i64:
; SI: buffer_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
; SI-NOT: and
; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]]
; SI-NOT: and
; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, -8
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -549,5 +567,4 @@ define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
attributes #0 = { nounwind readnone }

View File

@ -2,9 +2,9 @@
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}any_extend_vector_inreg_v16i8_to_v4i32:
; GCN: {{buffer|flat}}_load_dwordx4
; GCN-DAG: {{buffer|flat}}_load_dwordx4
; GCN-DAG: {{buffer|flat}}_load_dword
; GCN: s_load_dwordx4
; GCN-DAG: s_load_dwordx4
; GCN-DAG: s_load_dword
; GCN: {{buffer|flat}}_store_byte
; GCN: {{buffer|flat}}_store_byte

View File

@ -2,6 +2,8 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare i16 @llvm.bitreverse.i16(i16) #1
declare i32 @llvm.bitreverse.i32(i32) #1
declare i64 @llvm.bitreverse.i64(i64) #1
@ -42,12 +44,14 @@ define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val)
}
; FUNC-LABEL: {{^}}v_brev_i32:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
%val = load i32, i32 addrspace(1)* %valptr
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
store i32 %brev, i32 addrspace(1)* %out
ret void
@ -66,7 +70,9 @@ define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; SI: v_bfrev_b32_e32
; SI: v_bfrev_b32_e32
define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %gep
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
ret void
@ -82,7 +88,9 @@ define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val)
; FUNC-LABEL: {{^}}v_brev_i64:
; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0
define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
%val = load i64, i64 addrspace(1)* %valptr
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i64, i64 addrspace(1)* %valptr, i32 %tid
%val = load i64, i64 addrspace(1)* %gep
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
store i64 %brev, i64 addrspace(1)* %out
ret void
@ -97,7 +105,9 @@ define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
; FUNC-LABEL: {{^}}v_brev_v2i64:
define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x i64> , <2 x i64> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i64>, <2 x i64> addrspace(1)* %gep
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
ret void

View File

@ -10,7 +10,7 @@ declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
; FUNC-LABEL: @test_bswap_i32
; SI: buffer_load_dword [[VAL:v[0-9]+]]
; SI: s_load_dword [[VAL:s[0-9]+]]
; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff

View File

@ -1,9 +1,9 @@
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in
@ -40,7 +40,7 @@ done:
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT: getelementptr i32, i32 addrspace(4)* %out,
; OPT-CI-NOT: getelementptr
; rOPT-CI-NOT: getelementptr
; OPT: br i1
; OPT-CI: addrspacecast

View File

@ -1,9 +1,9 @@
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-scalarize-global-loads=false -mattr=-flat-for-global -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"

View File

@ -12,7 +12,7 @@ declare float @llvm.fma.f32(float, float, float)
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
; It's probably OK if this is slightly higher:
; CHECK: ; NumVgprs: 8
; CHECK: ; NumVgprs: 4
define amdgpu_kernel void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
entry:
%cmpflag = icmp eq i32 %flag, 1

View File

@ -5,35 +5,41 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
; FUNC-LABEL: {{^}}test_copy_v4i8:
; GCN: buffer_load_dword [[REG:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[REG:v[0-9]+]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
%val = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
; GCN: buffer_load_dword [[REG:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[REG:v[0-9]+]]
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
%val = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
ret void
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
; GCN: buffer_load_dword [[REG:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[REG:v[0-9]+]]
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
%val = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@ -41,14 +47,16 @@ define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
; GCN: buffer_load_dword [[REG:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[REG:v[0-9]+]]
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
%val = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@ -57,7 +65,7 @@ define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x
}
; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN-DAG: v_lshrrev_b32
; GCN: v_and_b32
; GCN: v_or_b32
@ -66,7 +74,9 @@ define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
%val = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@ -97,19 +107,21 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o
}
; FUNC-LABEL: {{^}}test_copy_v3i8_align4:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid.x
%val = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}test_copy_v3i8_align2:
; GCN-DAG: buffer_load_ushort v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_load_ubyte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN-DAG: {{buffer|flat}}_load_ushort v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: {{buffer|flat}}_load_ubyte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN: s_endpgm
@ -120,9 +132,9 @@ define amdgpu_kernel void @test_copy_v3i8_align2(<3 x i8> addrspace(1)* %out, <3
}
; FUNC-LABEL: {{^}}test_copy_v3i8_align1:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
@ -135,10 +147,10 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
@ -148,10 +160,10 @@ define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: {{buffer|flat}}_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_store_byte

View File

@ -34,7 +34,7 @@ define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val)
}
; FUNC-LABEL: {{^}}v_ctlz_i32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
; GCN-DAG: v_cmp_ne_u32_e32 vcc, 0, [[CTLZ]]
; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], 32, [[CTLZ]], vcc
@ -44,14 +44,16 @@ define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val)
; EG: FFBH_UINT
; EG: CNDE_INT
define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}v_ctlz_v2i32:
; GCN: buffer_load_dwordx2
; GCN: {{buffer|flat}}_load_dwordx2
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: buffer_store_dwordx2
@ -62,14 +64,16 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; EG: FFBH_UINT
; EG: CNDE_INT
define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}v_ctlz_v4i32:
; GCN: buffer_load_dwordx4
; GCN: {{buffer|flat}}_load_dwordx4
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
@ -90,14 +94,16 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; EG-DAG: FFBH_UINT
; EG-DAG: CNDE_INT
define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
ret void
}
; FUNC-LABEL: {{^}}v_ctlz_i8:
; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
; SI-DAG: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; VI-DAG: v_ffbh_u32_sdwa [[RESULT:v[0-9]+]], [[VAL]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
; GCN: buffer_store_byte [[RESULT]],
@ -168,12 +174,14 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
}
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %ctlz
@ -182,12 +190,14 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
}
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %ctlz, i32 -1
@ -197,13 +207,15 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; TODO: Should be able to eliminate select here as well.
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: s_endpgm
define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %ctlz, 32
%sel = select i1 %cmp, i32 -1, i32 %ctlz
@ -212,13 +224,15 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
}
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: s_endpgm
define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %ctlz, 32
%sel = select i1 %cmp, i32 %ctlz, i32 -1
@ -242,7 +256,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
}
; FUNC-LABEL: {{^}}v_ctlz_i16_sel_eq_neg1:
; SI: buffer_load_ushort [[VAL:v[0-9]+]],
; SI: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; SI: buffer_store_short [[FFBH]],
define amdgpu_kernel void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {

View File

@ -29,21 +29,23 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
; GCN: buffer_load_dwordx2
; GCN: {{buffer|flat}}_load_dwordx2
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: buffer_store_dwordx2
@ -52,14 +54,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
; GCN: buffer_load_dwordx4
; GCN: {{buffer|flat}}_load_dwordx4
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
@ -72,18 +76,22 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
ret void
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8:
; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_byte [[RESULT]],
define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%val = load i8, i8 addrspace(1)* %valptr
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
%val = load i8, i8 addrspace(1)* %in.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
store i8 %ctlz, i8 addrspace(1)* %out
ret void
@ -145,11 +153,13 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %ctlz
@ -158,11 +168,13 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_neg1:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %ctlz, i32 -1
@ -186,15 +198,17 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]]
; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[VAL]]
; GCN-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, vcc
; GCN-DAG: buffer_store_dword [[RESULT0]]
; GCN-DAG: buffer_store_byte [[RESULT1]]
; GCN: s_endpgm
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %ctlz
@ -205,13 +219,15 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
; Selected on wrong constant
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_0:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 0, i32 %ctlz
@ -221,13 +237,15 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
; Selected on wrong constant
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_0:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %ctlz, i32 0
@ -237,13 +255,15 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
; Compare on wrong constant
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_cmp_non0:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 1
%sel = select i1 %cmp, i32 0, i32 %ctlz
@ -253,13 +273,15 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
; Selected on wrong constant
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_cmp_non0:
; GCN: buffer_load_dword
; GCN: {{buffer|flat}}_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 1
%sel = select i1 %cmp, i32 %ctlz, i32 0

View File

@ -8,6 +8,8 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i32:
; GCN: s_load_dword [[SVAL:s[0-9]+]],
; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
@ -24,22 +26,24 @@ define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val)
; XXX - Why 0 in register?
; FUNC-LABEL: {{^}}v_ctpop_i32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
store i32 %ctpop, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL0:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL1:v[0-9]+]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
@ -49,8 +53,11 @@ define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrs
; EG: BCNT_INT
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
%val0 = load i32, i32 addrspace(1)* %in0, align 4
%val1 = load i32, i32 addrspace(1)* %in1, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %tid
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %tid
%val0 = load i32, i32 addrspace(1)* %in0.gep, align 4
%val1 = load i32, i32 addrspace(1)* %in1.gep, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
%ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
%add = add i32 %ctpop0, %ctpop1
@ -59,15 +66,17 @@ define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out,
}
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL0:v[0-9]+]],
; GCN: s_waitcnt
; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
%val0 = load i32, i32 addrspace(1)* %in0, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
%add = add i32 %ctpop0, %sval
define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %sval) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, %sval
store i32 %add, i32 addrspace(1)* %out, align 4
ret void
}
@ -80,7 +89,9 @@ define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out,
; EG: BCNT_INT
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
ret void
@ -98,7 +109,9 @@ define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <
; EG: BCNT_INT
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
ret void
@ -124,7 +137,9 @@ define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <
; EG: BCNT_INT
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tid
%val = load <8 x i32>, <8 x i32> addrspace(1)* %in.gep, align 32
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
ret void
@ -166,21 +181,25 @@ define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <
; EG: BCNT_INT
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tid
%val = load <16 x i32>, <16 x i32> addrspace(1)* %in.gep, align 32
%ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
ret void
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 4
store i32 %add, i32 addrspace(1)* %out, align 4
@ -188,14 +207,16 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noa
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 4, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
@ -203,14 +224,16 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)*
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 99999
store i32 %add, i32 addrspace(1)* %out, align 4
@ -218,7 +241,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %ou
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; GCN: buffer_store_dword [[RESULT]],
@ -226,7 +249,9 @@ define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %ou
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, %const
store i32 %add, i32 addrspace(1)* %out, align 4
@ -234,7 +259,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; GCN: buffer_store_dword [[RESULT]],
@ -242,7 +267,9 @@ define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %const, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
@ -250,18 +277,22 @@ define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %ou
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}}
; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: buffer_load_dword [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
; SI: buffer_load_dword [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAR]], [[VAL]]
; VI: flat_load_dword [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}]
; VI: flat_load_dword [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}]
; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
%gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 %tid
%const = load i32, i32 addrspace(1)* %gep, align 4
%add = add i32 %const, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4

View File

@ -1,6 +1,8 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
@ -25,14 +27,16 @@ define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val)
}
; FUNC-LABEL: {{^}}v_ctpop_i64:
; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%val = load i64, i64 addrspace(1)* %in, align 8
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@ -40,7 +44,7 @@ define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrs
}
; FUNC-LABEL: {{^}}v_ctpop_i64_user:
; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
@ -49,7 +53,9 @@ define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrs
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
%val = load i64, i64 addrspace(1)* %in, align 8
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%or = or i64 %ctpop, %s.val
store i64 %or, i64 addrspace(1)* %out
@ -87,7 +93,9 @@ define amdgpu_kernel void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i32 %tid
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in.gep, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
@ -105,7 +113,9 @@ define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
@ -169,7 +179,8 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val)
; FIXME: Should not have extra add
; FUNC-LABEL: {{^}}v_ctpop_i128:
; GCN: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; SI: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
; VI: flat_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}
; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0
; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]]
@ -182,7 +193,9 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val)
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind {
%val = load i128, i128 addrspace(1)* %in, align 8
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %tid
%val = load i128, i128 addrspace(1)* %in.gep, align 8
%ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone
%truncctpop = trunc i128 %ctpop to i32
store i32 %truncctpop, i32 addrspace(1)* %out, align 4

View File

@ -5,6 +5,7 @@
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
; SI: s_load_dword [[VAL:s[0-9]+]],
@ -21,21 +22,23 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
; SI: v_ffbl_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr, align 4
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
store i32 %cttz, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v2i32:
; SI: buffer_load_dwordx2
; SI: {{buffer|flat}}_load_dwordx2
; SI: v_ffbl_b32_e32
; SI: v_ffbl_b32_e32
; SI: buffer_store_dwordx2
@ -44,14 +47,16 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v4i32:
; SI: buffer_load_dwordx4
; SI: {{buffer|flat}}_load_dwordx4
; SI: v_ffbl_b32_e32
; SI: v_ffbl_b32_e32
; SI: v_ffbl_b32_e32
@ -64,7 +69,9 @@ define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
ret void

View File

@ -5,46 +5,52 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
; GCN-LABEL: {{^}}load_i8_to_f32:
; GCN: buffer_load_ubyte [[LOADREG:v[0-9]+]],
; GCN: {{buffer|flat}}_load_ubyte [[LOADREG:v[0-9]+]],
; GCN-NOT: bfe
; GCN-NOT: lshr
; GCN: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
; GCN: buffer_store_dword [[CONV]],
define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%load = load i8, i8 addrspace(1)* %in, align 1
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
%load = load i8, i8 addrspace(1)* %gep, align 1
%cvt = uitofp i8 %load to float
store float %cvt, float addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}load_v2i8_to_v2f32:
; GCN: buffer_load_ushort [[LD:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort [[LD:v[0-9]+]]
; GCN-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LD]]
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LD]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 2
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid
%load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
ret void
}
; GCN-LABEL: {{^}}load_v3i8_to_v3f32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]]
; GCN-NOT: v_cvt_f32_ubyte3_e32
; GCN-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[VAL]]
; GCN-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[VAL]]
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[VAL]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid
%load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
ret void
}
; GCN-LABEL: {{^}}load_v4i8_to_v4f32:
; GCN: buffer_load_dword [[LOADREG:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]]
; GCN-NOT: bfe
; GCN-NOT: lshr
; GCN-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
@ -53,7 +59,9 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; GCN: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
%load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
@ -64,10 +72,10 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias
; FIXME: Packing bytes
; GCN-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
; GCN: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
; GCN: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
; GCN: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
; GCN: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ubyte [[LOADREG3:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ubyte [[LOADREG2:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ubyte [[LOADREG1:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ubyte [[LOADREG0:v[0-9]+]]
; GCN-DAG: v_lshlrev_b32
; GCN-DAG: v_or_b32
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]],
@ -77,7 +85,9 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
%load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
@ -124,14 +134,16 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
; GCN-LABEL: {{^}}load_v7i8_to_v7f32:
; GCN: s_endpgm
define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid
%load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
ret void
}
; GCN-LABEL: {{^}}load_v8i8_to_v8f32:
; GCN: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
; GCN-NOT: bfe
; GCN-NOT: lshr
; GCN-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
@ -147,19 +159,23 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid
%load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
ret void
}
; GCN-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
; GCN: buffer_load_dword [[LOADREG:v[0-9]+]],
; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]],
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
; GCN-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
; GCN: buffer_store_dword [[CONV]],
define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%load = load i32, i32 addrspace(1)* %gep, align 4
%add = add i32 %load, 2
%inreg = and i32 %add, 255
%cvt = uitofp i32 %inreg to float
@ -169,7 +185,9 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias
; GCN-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32, i32 addrspace(1)* %in, align 4
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%load = load i32, i32 addrspace(1)* %gep, align 4
%inreg = and i32 %load, 65280
%shr = lshr i32 %inreg, 8
%cvt = uitofp i32 %shr to float
@ -181,7 +199,9 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias
; them so it shouldn't really matter.
; GCN-LABEL: {{^}}i8_zext_i32_to_f32:
define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%load = load i8, i8 addrspace(1)* %in, align 1
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
%load = load i8, i8 addrspace(1)* %gep, align 1
%ext = zext i8 %load to i32
%cvt = uitofp i32 %ext to float
store float %cvt, float addrspace(1)* %out, align 4
@ -190,7 +210,9 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out,
; GCN-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32:
define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
%load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
%ext = zext <4 x i8> %load to <4 x i32>
%cvt = uitofp <4 x i32> %ext to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@ -198,12 +220,14 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no
}
; GCN-LABEL: {{^}}extract_byte0_to_f32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]]
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%and = and i32 %val, 255
%cvt = uitofp i32 %and to float
store float %cvt, float addrspace(1)* %out
@ -211,12 +235,14 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out
}
; GCN-LABEL: {{^}}extract_byte1_to_f32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]]
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte1_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%srl = lshr i32 %val, 8
%and = and i32 %srl, 255
%cvt = uitofp i32 %and to float
@ -225,12 +251,14 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out
}
; GCN-LABEL: {{^}}extract_byte2_to_f32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]]
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte2_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%srl = lshr i32 %val, 16
%and = and i32 %srl, 255
%cvt = uitofp i32 %and to float
@ -239,12 +267,14 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out
}
; GCN-LABEL: {{^}}extract_byte3_to_f32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]]
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte3_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%srl = lshr i32 %val, 24
%and = and i32 %srl, 255
%cvt = uitofp i32 %and to float

View File

@ -1,4 +1,4 @@
; RUN: llc -stress-early-ifcvt -amdgpu-early-ifcvt=1 -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -stress-early-ifcvt -amdgpu-early-ifcvt=1 -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; FIXME: Most of these cases that don't trigger because of broken cost
; heuristics. Should not need -stress-early-ifcvt

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; XUN: llc -march=amdgcn -mcpu=tonga -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; FIXME: This leaves behind a now unnecessary and with exec

View File

@ -2,6 +2,8 @@
; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; Test that the -enable-no-signed-zeros-fp-math flag works
; GCN-LABEL: {{^}}fneg_fsub_f32:
@ -10,8 +12,11 @@
; GCN-UNSAFE-NOT: xor
define amdgpu_kernel void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
%a = load float, float addrspace(1)* %in, align 4
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%add = add i32 %tid, 1
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 %add
%a = load float, float addrspace(1)* %gep, align 4
%b = load float, float addrspace(1)* %b_ptr, align 4
%result = fsub float %a, %b
%neg.result = fsub float -0.0, %result

View File

@ -1,5 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; Make sure the add and load are reduced to 32-bits even with the
; bitcast to vector.
; GCN-LABEL: {{^}}bitcast_int_to_vector_extract_0:
@ -8,7 +10,9 @@
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, [[B]], [[A]]
; GCN: buffer_store_dword [[ADD]]
define amdgpu_kernel void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) {
%a = load i64, i64 addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%a = load i64, i64 addrspace(1)* %gep
%add = add i64 %a, %b
%val.bc = bitcast i64 %add to <2 x i32>
%extract = extractelement <2 x i32> %val.bc, i32 0
@ -21,7 +25,9 @@ define amdgpu_kernel void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %ou
; GCN: v_add_f64
; GCN: buffer_store_dword v
define amdgpu_kernel void @bitcast_fp_to_vector_extract_0(i32 addrspace(1)* %out, double addrspace(1)* %in, double %b) {
%a = load double, double addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
%a = load double, double addrspace(1)* %gep
%add = fadd double %a, %b
%val.bc = bitcast double %add to <2 x i32>
%extract = extractelement <2 x i32> %val.bc, i32 0
@ -34,7 +40,9 @@ define amdgpu_kernel void @bitcast_fp_to_vector_extract_0(i32 addrspace(1)* %out
; GCN: v_add_i32
; GCN: buffer_store_dword
define amdgpu_kernel void @bitcast_int_to_fpvector_extract_0(float addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) {
%a = load i64, i64 addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%a = load i64, i64 addrspace(1)* %gep
%add = add i64 %a, %b
%val.bc = bitcast i64 %add to <2 x float>
%extract = extractelement <2 x float> %val.bc, i32 0

View File

@ -134,7 +134,9 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i
; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]]
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[FABS]], v{{[0-9]+$}}
define amdgpu_kernel void @v_fabs_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
%fmul = fmul <2 x half> %fabs, %val
store <2 x half> %fmul, <2 x half> addrspace(1)* %out

View File

@ -2,8 +2,8 @@
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}fadd_f16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
@ -24,7 +24,7 @@ entry:
}
; GCN-LABEL: {{^}}fadd_f16_imm_a
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], 1.0, v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
@ -42,7 +42,7 @@ entry:
}
; GCN-LABEL: {{^}}fadd_f16_imm_b
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], 2.0, v[[A_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
@ -60,8 +60,8 @@ entry:
}
; GCN-LABEL: {{^}}fadd_v2f16:
; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_dword v[[A_V2_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_dword v[[B_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
@ -70,8 +70,8 @@ entry:
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]]
; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI-DAG: v_add_f32_e32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32_0]]
; SI-DAG: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
@ -88,15 +88,18 @@ define amdgpu_kernel void @fadd_v2f16(
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.a = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %a, i32 %tid
%gep.b = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %b, i32 %tid
%a.val = load <2 x half>, <2 x half> addrspace(1)* %gep.a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %gep.b
%r.val = fadd <2 x half> %a.val, %b.val
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fadd_v2f16_imm_a:
; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword v[[B_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
@ -118,14 +121,16 @@ define amdgpu_kernel void @fadd_v2f16_imm_a(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %b) {
entry:
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.b = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %b, i32 %tid
%b.val = load <2 x half>, <2 x half> addrspace(1)* %gep.b
%r.val = fadd <2 x half> <half 1.0, half 2.0>, %b.val
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fadd_v2f16_imm_b:
; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword v[[A_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
@ -147,8 +152,15 @@ define amdgpu_kernel void @fadd_v2f16_imm_b(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.a = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %a, i32 %tid
%a.val = load <2 x half>, <2 x half> addrspace(1)* %gep.a
%r.val = fadd <2 x half> %a.val, <half 2.0, half 1.0>
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -5,8 +5,11 @@
; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
define amdgpu_kernel void @v_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1
%r1 = load double, double addrspace(1)* %in2
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep1 = getelementptr inbounds double, double addrspace(1)* %in1, i32 %tid
%gep2 = getelementptr inbounds double, double addrspace(1)* %in2, i32 %tid
%r0 = load double, double addrspace(1)* %gep1
%r1 = load double, double addrspace(1)* %gep2
%r2 = fadd double %r0, %r1
store double %r2, double addrspace(1)* %out
ret void
@ -42,3 +45,8 @@ define amdgpu_kernel void @s_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x do
store <2 x double> %r2, <2 x double> addrspace(1)* %out
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -5,6 +5,8 @@ declare half @llvm.fabs.f16(half) #0
declare half @llvm.canonicalize.f16(half) #0
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
; GCN: v_mul_f16_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
@ -213,7 +215,9 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace
; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+$}}
; GFX9: buffer_store_dword [[REG]]
define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%val = load <2 x half>, <2 x half> addrspace(1)* %out
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
ret void
@ -233,7 +237,9 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)
; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]]{{$}}
; GCN: buffer_store_dword
define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%val = load <2 x half>, <2 x half> addrspace(1)* %out
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@ -251,7 +257,9 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspa
; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]] neg_lo:[0,1] neg_hi:[0,1]{{$}}
; GCN: buffer_store_dword
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%val = load <2 x half>, <2 x half> addrspace(1)* %out
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
%val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
@ -270,7 +278,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad
; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} neg_lo:[0,1] neg_hi:[0,1]{{$}}
; GFX9: buffer_store_dword [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%val = load <2 x half>, <2 x half> addrspace(1)* %out
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare float @llvm.fabs.f32(float) #0
declare float @llvm.canonicalize.f32(float) #0

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}fcmp_f16_lt
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]

View File

@ -2,7 +2,7 @@
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}flt_f64:
; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_nge_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define amdgpu_kernel void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1
@ -14,7 +14,7 @@ define amdgpu_kernel void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)*
}
; CHECK-LABEL: {{^}}fle_f64:
; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_ngt_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define amdgpu_kernel void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1
@ -26,7 +26,7 @@ define amdgpu_kernel void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)*
}
; CHECK-LABEL: {{^}}fgt_f64:
; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_nle_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define amdgpu_kernel void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1
@ -38,7 +38,7 @@ define amdgpu_kernel void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)*
}
; CHECK-LABEL: {{^}}fge_f64:
; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_nlt_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define amdgpu_kernel void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1
@ -50,7 +50,7 @@ define amdgpu_kernel void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)*
}
; CHECK-LABEL: {{^}}fne_f64:
; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_neq_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define amdgpu_kernel void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1
@ -62,7 +62,7 @@ define amdgpu_kernel void @fne_f64(double addrspace(1)* %out, double addrspace(1
}
; CHECK-LABEL: {{^}}feq_f64:
; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_nlg_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define amdgpu_kernel void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1

View File

@ -6,8 +6,15 @@
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
define amdgpu_kernel void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r1 = load double, double addrspace(1)* %in
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds double, double addrspace(1)* %in, i32 %tid
%r1 = load double, double addrspace(1)* %gep
%r2 = fadd double %r1, 5.000000e+00
store double %r2, double addrspace(1)* %out
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
declare half @llvm.copysign.f16(half, half)
declare float @llvm.copysign.f32(float, float)
@ -9,16 +9,18 @@ declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>)
declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}test_copysign_f16:
; SI: buffer_load_ushort v[[SIGN:[0-9]+]]
; SI: buffer_load_ushort v[[MAG:[0-9]+]]
; SI: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
; SI: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
; SI: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_F32]]
; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]]
; GFX89: buffer_load_ushort v[[SIGN:[0-9]+]]
; GFX89: buffer_load_ushort v[[MAG:[0-9]+]]
; GFX89: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
; GFX89: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
; GFX89: s_movk_i32 s[[CONST:[0-9]+]], 0x7fff
; GFX89: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN]]
; GCN: buffer_store_short v[[OUT]]
@ -36,8 +38,8 @@ entry:
}
; GCN-LABEL: {{^}}test_copysign_out_f32_mag_f16_sign_f32:
; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: buffer_load_dword v[[SIGN:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword v[[SIGN:[0-9]+]]
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
; GCN: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG_EXT]], v[[SIGN]]
@ -48,17 +50,20 @@ define amdgpu_kernel void @test_copysign_out_f32_mag_f16_sign_f32(
half addrspace(1)* %arg_mag,
float addrspace(1)* %arg_sign) {
entry:
%mag = load half, half addrspace(1)* %arg_mag
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%arg_mag_gep = getelementptr half, half addrspace(1)* %arg_mag, i32 %tid
%mag = load half, half addrspace(1)* %arg_mag_gep
%mag.ext = fpext half %mag to float
%sign = load float, float addrspace(1)* %arg_sign
%arg_sign_gep = getelementptr float, float addrspace(1)* %arg_sign, i32 %tid
%sign = load float, float addrspace(1)* %arg_sign_gep
%out = call float @llvm.copysign.f32(float %mag.ext, float %sign)
store float %out, float addrspace(1)* %arg_out
ret void
}
; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f16_sign_f64:
; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[MAG_EXT_LO:[0-9]+]]:[[MAG_EXT_HI:[0-9]+]]{{\]}}, v[[MAG_EXT]]
@ -70,17 +75,20 @@ define amdgpu_kernel void @test_copysign_out_f64_mag_f16_sign_f64(
half addrspace(1)* %arg_mag,
double addrspace(1)* %arg_sign) {
entry:
%mag = load half, half addrspace(1)* %arg_mag
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%arg_mag_gep = getelementptr half, half addrspace(1)* %arg_mag, i32 %tid
%mag = load half, half addrspace(1)* %arg_mag_gep
%mag.ext = fpext half %mag to double
%sign = load double, double addrspace(1)* %arg_sign
%arg_sign_gep = getelementptr double, double addrspace(1)* %arg_sign, i32 %tid
%sign = load double, double addrspace(1)* %arg_sign_gep
%out = call double @llvm.copysign.f64(double %mag.ext, double %sign)
store double %out, double addrspace(1)* %arg_out
ret void
}
; GCN-LABEL: {{^}}test_copysign_out_f32_mag_f32_sign_f16:
; GCN-DAG: buffer_load_dword v[[MAG:[0-9]+]]
; GCN-DAG: buffer_load_ushort v[[SIGN:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
; SI: v_bfi_b32 v[[OUT:[0-9]+]], s[[CONST]], v[[MAG]], v[[SIGN_F32]]
@ -93,8 +101,11 @@ define amdgpu_kernel void @test_copysign_out_f32_mag_f32_sign_f16(
float addrspace(1)* %arg_mag,
half addrspace(1)* %arg_sign) {
entry:
%mag = load float, float addrspace(1)* %arg_mag
%sign = load half, half addrspace(1)* %arg_sign
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%arg_mag_gep = getelementptr float, float addrspace(1)* %arg_mag, i32 %tid
%mag = load float, float addrspace(1)* %arg_mag_gep
%arg_sign_gep = getelementptr half, half addrspace(1)* %arg_sign, i32 %tid
%sign = load half, half addrspace(1)* %arg_sign_gep
%sign.ext = fpext half %sign to float
%out = call float @llvm.copysign.f32(float %mag, float %sign.ext)
store float %out, float addrspace(1)* %arg_out
@ -102,8 +113,8 @@ entry:
}
; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f64_sign_f16:
; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]{{\]}}
; GCN-DAG: buffer_load_ushort v[[SIGN:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]{{\]}}
; GCN-DAG: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
; SI: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_F32]]
@ -116,8 +127,11 @@ define amdgpu_kernel void @test_copysign_out_f64_mag_f64_sign_f16(
double addrspace(1)* %arg_mag,
half addrspace(1)* %arg_sign) {
entry:
%mag = load double, double addrspace(1)* %arg_mag
%sign = load half, half addrspace(1)* %arg_sign
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%arg_mag_gep = getelementptr double, double addrspace(1)* %arg_mag, i32 %tid
%mag = load double, double addrspace(1)* %arg_mag_gep
%arg_sign_gep = getelementptr half, half addrspace(1)* %arg_sign, i32 %tid
%sign = load half, half addrspace(1)* %arg_sign_gep
%sign.ext = fpext half %sign to double
%out = call double @llvm.copysign.f64(double %mag, double %sign.ext)
store double %out, double addrspace(1)* %arg_out
@ -125,8 +139,8 @@ entry:
}
; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f32:
; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: buffer_load_dword v[[SIGN:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword v[[SIGN:[0-9]+]]
; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN]]
@ -141,8 +155,11 @@ define amdgpu_kernel void @test_copysign_out_f16_mag_f16_sign_f32(
half addrspace(1)* %arg_mag,
float addrspace(1)* %arg_sign) {
entry:
%mag = load half, half addrspace(1)* %arg_mag
%sign = load float, float addrspace(1)* %arg_sign
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%arg_mag_gep = getelementptr half, half addrspace(1)* %arg_mag, i32 %tid
%mag = load half, half addrspace(1)* %arg_mag_gep
%arg_sign_gep = getelementptr float, float addrspace(1)* %arg_sign, i32 %tid
%sign = load float, float addrspace(1)* %arg_sign_gep
%sign.trunc = fptrunc float %sign to half
%out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
store half %out, half addrspace(1)* %arg_out
@ -150,8 +167,8 @@ entry:
}
; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f64:
; GCN-DAG: buffer_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
; GCN-DAG: {{buffer|flat}}_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_HI]]
@ -166,8 +183,11 @@ define amdgpu_kernel void @test_copysign_out_f16_mag_f16_sign_f64(
half addrspace(1)* %arg_mag,
double addrspace(1)* %arg_sign) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%arg_mag_gep = getelementptr half, half addrspace(1)* %arg_mag, i32 %tid
%mag = load half, half addrspace(1)* %arg_mag
%sign = load double, double addrspace(1)* %arg_sign
%arg_sign_gep = getelementptr double, double addrspace(1)* %arg_sign, i32 %tid
%sign = load double, double addrspace(1)* %arg_sign_gep
%sign.trunc = fptrunc double %sign to half
%out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
store half %out, half addrspace(1)* %arg_out
@ -175,8 +195,8 @@ entry:
}
; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f32_sign_f16:
; GCN-DAG: buffer_load_dword v[[MAG:[0-9]+]]
; GCN-DAG: buffer_load_ushort v[[SIGN:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_dword v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat}}_load_ushort v[[SIGN:[0-9]+]]
; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f16_f32_e32 v[[MAG_TRUNC:[0-9]+]], v[[MAG]]
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
@ -193,9 +213,12 @@ define amdgpu_kernel void @test_copysign_out_f16_mag_f32_sign_f16(
float addrspace(1)* %arg_mag,
half addrspace(1)* %arg_sign) {
entry:
%mag = load float, float addrspace(1)* %arg_mag
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%arg_mag_gep = getelementptr float, float addrspace(1)* %arg_mag, i32 %tid
%mag = load float, float addrspace(1)* %arg_mag_gep
%mag.trunc = fptrunc float %mag to half
%sign = load half, half addrspace(1)* %arg_sign
%arg_sign_gep = getelementptr half, half addrspace(1)* %arg_sign, i32 %tid
%sign = load half, half addrspace(1)* %arg_sign_gep
%out = call half @llvm.copysign.f16(half %mag.trunc, half %sign)
store half %out, half addrspace(1)* %arg_out
ret void

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be
; beneficial even without fp32 denormals, but they do require no-infs-fp-math

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.fma.f64(double, double, double) nounwind readnone
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.fma.f32(float, float, float) nounwind readnone
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}fmul_f16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; FUNC-LABEL: {{^}}fmul_f64:
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}

View File

@ -1,12 +1,12 @@
; RUN: llc -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
; RUN: llc -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
; RUN: llc -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
; RUN: llc -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
; RUN: llc -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
; RUN: llc -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
; RUN: llc -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
; RUN: llc -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.

View File

@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s
; RUN: llc -march=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
; RUN: llc -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
; RUN: llc -march=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s
; GCN-LABEL: {{^}}fmuladd_f64:
; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}

View File

@ -1,12 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
; SI-NOT: and

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=GFX89 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN -check-prefix=GFX89 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=GFX89 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN -check-prefix=GFX89 %s
; FIXME: Should be able to do scalar op
; GCN-LABEL: {{^}}s_fneg_f16:

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
; GCN-LABEL: {{^}}fpext_f16_to_f32
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}fptosi_f16_to_i16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}fptoui_f16_to_i16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; GCN-LABEL: {{^}}fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]

View File

@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=VI-UNSAFE -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=VI-UNSAFE -check-prefix=FUNC %s
declare double @llvm.fabs.f64(double) #0
declare double @llvm.floor.f64(double) #0

View File

@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s
declare float @llvm.fabs.f32(float) #0
declare float @llvm.floor.f32(float) #0

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}frem_f32:
; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}v_safe_fsqrt_f64:
; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x)

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
; GCN-LABEL: {{^}}fsub_f16:
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}v_fsub_f32:
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare double @llvm.fabs.f64(double) #0

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
declare double @llvm.trunc.f64(double) nounwind readnone
declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: cypress is broken because the bigger testcases spill and it's not implemented

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; half args should be promoted to float for SI and lower.

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; Use a 64-bit value with lo bits that can be represented as an inline constant
; GCN-LABEL: {{^}}i64_imm_inline_lo:

View File

@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; FIXME: Merge into imm.ll
; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:

View File

@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
; Tests for indirect addressing on SI, which is implemented using dynamic
; indexing of vectors.

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GatherAllAliases gives up on trying to analyze cases where the
; pointer may have been loaded from an aliased store, so make sure

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.fabs.f16(half %a)
declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i1 @llvm.amdgcn.class.f32(float, i32) #1
declare i1 @llvm.amdgcn.class.f64(double, i32) #1

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=VI %s
; FIXME: Enable for VI.

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.amdgcn.ldexp.f16(half %a, i32 %b)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}bfe_i32_arg_arg_arg:
; GCN: v_bfe_i32

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.sffbh.i32(i32) #1

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg:
; GCN: v_bfe_u32

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.ceil.f16(half %a)
declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.cos.f16(half %a)
declare <2 x half> @llvm.cos.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.exp2.f16(half %a)
declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.floor.f16(half %a)
declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.fma.f16(half %a, half %b, half %c)
declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)

View File

@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.log2.f16(half %a)
declare <2 x half> @llvm.log2.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.maxnum.f16(half %a, half %b)
declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.minnum.f16(half %a, half %b)
declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
declare half @llvm.rint.f16(half %a)
declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.sin.f16(half %a)
declare <2 x half> @llvm.sin.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.sqrt.f16(half %a)
declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare half @llvm.trunc.f16(half %a)
declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a)

View File

@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}global_load_f32:
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}global_load_f64:
; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]

View File

@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s
; FIXME: r600 is broken because the bigger testcases spill and it's not implemented

View File

@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}global_load_i32:

View File

@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}global_load_i64:
; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]

View File

@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,SI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,SI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}global_load_i8:

View File

@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=CM -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=CM -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}load_i24:
; SI: {{flat|buffer}}_load_ubyte

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}v_test_imax_sge_i32:

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; This test is mostly to test DAG store merging, so disable the vectorizer.
; Run with devices with different unaligned load restrictions.

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
declare i32 @llvm.amdgcn.workitem.id.x() readnone

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC
; mul24 and mad24 are affected

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}or_v2i32:

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) #0
declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) #0

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}reduce_i64_load_align_4_width_to_i32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
; SI: buffer_load_dwordx4

Some files were not shown because too many files have changed in this diff Show More