1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU: Un-xfail and add tests

Un XFAIL a few tests plus a few more I had lying around
in my tree, which seem to all work now but I don't see tests
that quite test the same things.

llvm-svn: 273655
This commit is contained in:
Matt Arsenault 2016-06-24 06:58:01 +00:00
parent c72c584a96
commit fe796a3979
8 changed files with 386 additions and 35 deletions

View File

@ -0,0 +1,121 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s
; GCN-LABEL: {{^}}test_loop:
; GCN: [[LABEL:BB[0-9+]_[0-9]+]]:
; GCN: ds_read_b32
; GCN: ds_write_b32
; GCN: s_branch [[LABEL]]
; GCN: s_endpgm
define void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
%cmp = icmp eq i32 %n, -1
br i1 %cmp, label %for.exit, label %for.body
for.exit:
ret void
for.body:
%indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%tmp = add i32 %indvar, 32
%arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
%vecload = load float, float addrspace(3)* %arrayidx, align 4
%add = fadd float %vecload, 1.0
store float %add, float addrspace(3)* %arrayidx, align 8
%inc = add i32 %indvar, 1
br label %for.body
}
; GCN-LABEL: @loop_const_true
; GCN: [[LABEL:BB[0-9+]_[0-9]+]]:
; GCN: ds_read_b32
; GCN: ds_write_b32
; GCN: s_branch [[LABEL]]
define void @loop_const_true(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
br label %for.body
for.exit:
ret void
for.body:
%indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%tmp = add i32 %indvar, 32
%arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
%vecload = load float, float addrspace(3)* %arrayidx, align 4
%add = fadd float %vecload, 1.0
store float %add, float addrspace(3)* %arrayidx, align 8
%inc = add i32 %indvar, 1
br i1 true, label %for.body, label %for.exit
}
; GCN-LABEL: {{^}}loop_const_false:
; GCN-NOT: s_branch
; GCN: s_endpgm
define void @loop_const_false(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
br label %for.body
for.exit:
ret void
; XXX - Should there be an S_ENDPGM?
for.body:
%indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%tmp = add i32 %indvar, 32
%arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
%vecload = load float, float addrspace(3)* %arrayidx, align 4
%add = fadd float %vecload, 1.0
store float %add, float addrspace(3)* %arrayidx, align 8
%inc = add i32 %indvar, 1
br i1 false, label %for.body, label %for.exit
}
; GCN-LABEL: {{^}}loop_const_undef:
; GCN-NOT: s_branch
; GCN: s_endpgm
define void @loop_const_undef(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
br label %for.body
for.exit:
ret void
; XXX - Should there be an s_endpgm?
for.body:
%indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%tmp = add i32 %indvar, 32
%arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
%vecload = load float, float addrspace(3)* %arrayidx, align 4
%add = fadd float %vecload, 1.0
store float %add, float addrspace(3)* %arrayidx, align 8
%inc = add i32 %indvar, 1
br i1 undef, label %for.body, label %for.exit
}
; GCN-LABEL: {{^}}loop_arg_0:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; GCN: v_cmp_eq_i32_e32 vcc, 1,
; GCN: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, exec, vcc
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_vccnz [[LOOPBB]]
; GCN-NEXT: ; BB#2
; GCN-NEXT: s_endpgm
define void @loop_arg_0(float addrspace(3)* %ptr, i32 %n, i1 %cond) nounwind {
entry:
br label %for.body
for.exit:
ret void
for.body:
%indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%tmp = add i32 %indvar, 32
%arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
%vecload = load float, float addrspace(3)* %arrayidx, align 4
%add = fadd float %vecload, 1.0
store float %add, float addrspace(3)* %arrayidx, align 8
%inc = add i32 %indvar, 1
br i1 %cond, label %for.body, label %for.exit
}

View File

@ -0,0 +1,126 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32:
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dword
define void @store_build_vector_multiple_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0,
<4 x i32> addrspace(1)* noalias %out1,
i32 addrspace(1)* noalias %out2,
i32 addrspace(1)* %in) {
%elt0 = load volatile i32, i32 addrspace(1)* %in
%elt1 = load volatile i32, i32 addrspace(1)* %in
%elt2 = load volatile i32, i32 addrspace(1)* %in
%elt3 = load volatile i32, i32 addrspace(1)* %in
%vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
%vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
%vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
%vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0
store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out1
%extract0 = extractelement <4 x i32> %vec3, i32 0
%extract1 = extractelement <4 x i32> %vec3, i32 1
%extract2 = extractelement <4 x i32> %vec3, i32 2
%extract3 = extractelement <4 x i32> %vec3, i32 3
store volatile i32 %extract0, i32 addrspace(1)* %out2
store volatile i32 %extract1, i32 addrspace(1)* %out2
store volatile i32 %extract2, i32 addrspace(1)* %out2
store volatile i32 %extract3, i32 addrspace(1)* %out2
ret void
}
; GCN-LABEL: {{^}}store_build_vector_multiple_extract_uses_v4i32:
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dword
define void @store_build_vector_multiple_extract_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0,
<4 x i32> addrspace(1)* noalias %out1,
i32 addrspace(1)* noalias %out2,
i32 addrspace(1)* %in) {
%elt0 = load volatile i32, i32 addrspace(1)* %in
%elt1 = load volatile i32, i32 addrspace(1)* %in
%elt2 = load volatile i32, i32 addrspace(1)* %in
%elt3 = load volatile i32, i32 addrspace(1)* %in
%vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
%vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
%vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
%vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
%extract0 = extractelement <4 x i32> %vec3, i32 0
%extract1 = extractelement <4 x i32> %vec3, i32 1
%extract2 = extractelement <4 x i32> %vec3, i32 2
%extract3 = extractelement <4 x i32> %vec3, i32 3
%op0 = add i32 %extract0, 3
%op1 = sub i32 %extract1, 9
%op2 = xor i32 %extract2, 1231412
%op3 = and i32 %extract3, 258233412312
store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0
store volatile i32 %op0, i32 addrspace(1)* %out2
store volatile i32 %op1, i32 addrspace(1)* %out2
store volatile i32 %op2, i32 addrspace(1)* %out2
store volatile i32 %op3, i32 addrspace(1)* %out2
ret void
}
; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64:
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx2
; GCN: buffer_store_dwordx2
define void @store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64(<2 x i64> addrspace(1)* noalias %out0,
<4 x i32> addrspace(1)* noalias %out1,
i64 addrspace(1)* noalias %out2,
i32 addrspace(1)* %in) {
%elt0 = load volatile i32, i32 addrspace(1)* %in
%elt1 = load volatile i32, i32 addrspace(1)* %in
%elt2 = load volatile i32, i32 addrspace(1)* %in
%elt3 = load volatile i32, i32 addrspace(1)* %in
%vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
%vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
%vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
%vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
%bc.vec3 = bitcast <4 x i32> %vec3 to <2 x i64>
store <2 x i64> %bc.vec3, <2 x i64> addrspace(1)* %out0
%extract0 = extractelement <2 x i64> %bc.vec3, i32 0
%extract1 = extractelement <2 x i64> %bc.vec3, i32 1
store volatile i64 %extract0, i64 addrspace(1)* %out2
store volatile i64 %extract1, i64 addrspace(1)* %out2
ret void
}

View File

@ -0,0 +1,36 @@
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; If the workgroup id range is restricted, we should be able to use
; mad24 for the usual indexing pattern.
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
; GCN-LABEL: {{^}}get_global_id_0:
; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
; GCN: v_mov_b32_e32 [[VWGSIZEX:v[0-9]+]], [[WGSIZEX]]
; GCN: v_mad_u32_u24 v{{[0-9]+}}, [[VWGSIZEX]], s8, v0
define void @get_global_id_0(i32 addrspace(1)* %out) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%cast.dispatch.ptr = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%gep = getelementptr inbounds i32, i32 addrspace(2)* %cast.dispatch.ptr, i64 1
%workgroup.size.xy = load i32, i32 addrspace(2)* %gep, align 4, !invariant.load !0
%workgroup.size.x = and i32 %workgroup.size.xy, 65535
%workitem.id.x = call i32 @llvm.amdgcn.workitem.id.x(), !range !1
%workgroup.id.x = call i32 @llvm.amdgcn.workgroup.id.x(), !range !2
%mul = mul i32 %workgroup.id.x, %workgroup.size.x
%add = add i32 %mul, %workitem.id.x
store i32 %add, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!0 = !{}
!1 = !{i32 0, i32 1024}
!2 = !{i32 0, i32 16777216}

View File

@ -201,3 +201,15 @@ entry:
store i32 %mask, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}extract_hi_i64_bitcast_v2i32:
; SI: buffer_load_dword v
; SI: buffer_store_dword v
define void @extract_hi_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
%bc = bitcast <2 x i32> %ld to i64
%hi = lshr i64 %bc, 32
%trunc = trunc i64 %hi to i32
store i32 %trunc, i32 addrspace(1)* %out
ret void
}

View File

@ -1,25 +0,0 @@
; REQUIRES: asserts
; XFAIL: *
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
; CHECK-LABEL: {{^}}test:
entry:
switch i32 %x, label %sw.default [
i32 0, label %sw.bb
i32 60, label %sw.bb
]
sw.bb:
unreachable
sw.default:
unreachable
sw.epilog:
ret void
}

View File

@ -64,6 +64,87 @@ exit:
ret void
}
; FIXME: should emit s_endpgm
; CHECK-LABEL: {{^}}switch_unreachable:
; CHECK-NOT: s_endpgm
; CHECK: .Lfunc_end2
define void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
centry:
switch i32 %x, label %sw.default [
i32 0, label %sw.bb
i32 60, label %sw.bb
]
sw.bb:
unreachable
sw.default:
unreachable
sw.epilog:
ret void
}
declare float @llvm.fabs.f32(float) nounwind readnone
; This broke the old AMDIL cfg structurizer
; FUNC-LABEL: {{^}}loop_land_info_assert:
; SI: s_cmp_gt_i32
; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]
; SI: s_cmp_gt_i32
; SI-NEXT: s_cbranch_scc1 [[ENDPGM]]
; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]
; SI: s_branch [[INFLOOP]]
; SI: [[ENDPGM]]:
; SI: s_endpgm
define void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
entry:
%cmp = icmp sgt i32 %c0, 0
br label %while.cond.outer
while.cond.outer:
%tmp = load float, float addrspace(1)* undef
br label %while.cond
while.cond:
%cmp1 = icmp slt i32 %c1, 4
br i1 %cmp1, label %convex.exit, label %for.cond
convex.exit:
%or = or i1 %cmp, %cmp1
br i1 %or, label %return, label %if.end
if.end:
%tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
%cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
br i1 %cmp2, label %if.else, label %while.cond.outer
if.else:
store volatile i32 3, i32 addrspace(1)* undef, align 4
br label %while.cond
for.cond:
%cmp3 = icmp slt i32 %c3, 1000
br i1 %cmp3, label %for.body, label %return
for.body:
br i1 %cmp3, label %self.loop, label %if.end.2
if.end.2:
%or.cond2 = or i1 %cmp3, %arg
br i1 %or.cond2, label %return, label %for.cond
self.loop:
br label %self.loop
return:
ret void
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
attributes #0 = { nounwind readnone }

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=kaveri < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test:
; CHECK s_and_saveexec_b64
@ -6,7 +6,7 @@
; CHECK s_or_b64 exec, exec
; CHECK s_andn2_b64 exec, exec
; CHECK s_cbranch_execnz
define spir_kernel void @test(i32 %arg, i32 %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6) {
define void @test(i32 %arg, i32 %arg1) {
bb:
%tmp = icmp ne i32 %arg, 0
%tmp7 = icmp ne i32 %arg1, 0

View File

@ -1,8 +1,6 @@
; REQUIRES: asserts
; XFAIL: *
; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s
; XUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s
; SI hits an assertion at -O0, evergreen hits a not implemented unreachable.
@ -41,8 +39,9 @@ for.end: ; preds = %for.body, %entry
}
; COMMON-LABEL: {{^}}branch_false:
; SI: .text
; SI-NEXT: s_endpgm
; SI: s_cbranch_vccnz
; SI: s_cbranch_vccnz
; SI: s_endpgm
define void @branch_false(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
entry:
br i1 false, label %for.end, label %for.body.lr.ph
@ -77,8 +76,9 @@ for.end: ; preds = %for.body, %entry
}
; COMMON-LABEL: {{^}}branch_undef:
; SI: .text
; SI-NEXT: s_endpgm
; SI: s_cbranch_vccnz
; SI: s_cbranch_vccnz
; SI: s_endpgm
define void @branch_undef(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
entry:
br i1 undef, label %for.end, label %for.body.lr.ph