1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU: Remove some uses of llvm.SI.export in tests

Merge some of the old, smaller tests into more complete versions.

llvm-svn: 295792
This commit is contained in:
Matt Arsenault 2017-02-22 00:02:21 +00:00
parent 65d8dccee7
commit 3320e649a3
33 changed files with 952 additions and 1065 deletions

View File

@ -3,19 +3,15 @@
; This test just checks that the compiler doesn't crash.
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
; SI: s_endpgm
define amdgpu_ps void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
%1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
%3 = extractelement <8 x i32> %2, i32 1
%4 = icmp ne i32 %3, 0
%5 = select i1 %4, float 0.0, float 1.0
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
ret void
ret float %5
}
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:

View File

@ -4,7 +4,7 @@
; GCN-LABEL: {{^}}main:
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
define amdgpu_ps void @main(float %arg0, float %arg1) #0 {
define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
bb:
%tmp = fptosi float %arg0 to i32
%tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@ -17,13 +17,11 @@ bb:
%tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
%tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
%tmp9 = bitcast i32 %tmp8 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
ret void
ret float %tmp9
}
declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -97,18 +97,15 @@ main_body:
; GCN-LABEL: {{^}}kill_vcc_implicit_def:
; GCN: IeeeMode: 0
define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
entry:
%tmp0 = fcmp olt float %13, 0.0
call void @llvm.AMDGPU.kill(float %14)
%tmp1 = select i1 %tmp0, float 1.0, float 0.0
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
ret void
ret float %tmp1
}
declare void @llvm.AMDGPU.kill(float)
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind "target-cpu"="tahiti" }
attributes #1 = { nounwind "target-cpu"="fiji" }

View File

@ -24,11 +24,13 @@
; TONGA-NEXT: .long 704
; CONFIG: .p2align 8
; CONFIG: test:
define amdgpu_ps void @test(i32 %p) {
define amdgpu_ps void @test(i32 %p) #0 {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r, float %r, float %r, float %r, i1 true, i1 false)
ret void
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind }

View File

@ -667,3 +667,18 @@ define void @store_literal_imm_f64(double addrspace(1)* %out) {
store double 4096.0, double addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}literal_folding:
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
define amdgpu_vs void @literal_folding(float %arg) {
main_body:
%tmp = fmul float %arg, 0x3FE86A7F00000000
%tmp1 = fmul float %arg, 0xBFE86A7F00000000
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
ret void
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind }

View File

@ -1,18 +1,18 @@
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
--- |
define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x
i32> inreg, i32 inreg %w, float %v) #0 {
%a = load volatile float, float addrspace(1)* undef
%b = load volatile float, float addrspace(1)* undef
%c = load volatile float, float addrspace(1)* undef
%d = load volatile float, float addrspace(1)* undef
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d)
call void @llvm.amdgcn.exp.f32(i32 15, i32 1, float %a, float %b, float %c, float %d, i1 true, i1 false)
ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { readnone }
attributes #1 = { nounwind }
attributes #0 = { nounwind }
...
---

View File

@ -4,15 +4,14 @@
; SI-LABEL: {{^}}kill_gs_const:
; SI-NOT: v_cmpx_le_f32
; SI: s_mov_b64 exec, 0
define amdgpu_gs void @kill_gs_const() {
main_body:
%0 = icmp ule i32 0, 3
%1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
call void @llvm.AMDGPU.kill(float %1)
%2 = icmp ule i32 3, 0
%3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
call void @llvm.AMDGPU.kill(float %3)
%tmp = icmp ule i32 0, 3
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
call void @llvm.AMDGPU.kill(float %tmp1)
%tmp2 = icmp ule i32 3, 0
%tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
call void @llvm.AMDGPU.kill(float %tmp3)
ret void
}
@ -21,16 +20,16 @@ main_body:
; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
entry:
%tmp0 = fcmp olt float %13, 0.0
call void @llvm.AMDGPU.kill(float %14)
%tmp1 = select i1 %tmp0, float 1.0, float 0.0
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
%tmp0 = fcmp olt float %arg13, 0.000000e+00
call void @llvm.AMDGPU.kill(float %arg14)
%tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
ret void
}
declare void @llvm.AMDGPU.kill(float)
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare void @llvm.AMDGPU.kill(float) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
!0 = !{!"const", null, i32 1}
attributes #0 = { nounwind }

View File

@ -1,146 +1,144 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=CHECK,VI %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
;CHECK-LABEL: {{^}}image_load_v4i32:
;CHECK: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
; GCN-LABEL: {{^}}image_load_v4i32:
; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
;CHECK-LABEL: {{^}}image_load_v2i32:
;CHECK: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
; GCN-LABEL: {{^}}image_load_v2i32:
; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
;CHECK-LABEL: {{^}}image_load_i32:
;CHECK: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) {
; GCN-LABEL: {{^}}image_load_i32:
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
;CHECK-LABEL: {{^}}image_load_mip:
;CHECK: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) {
; GCN-LABEL: {{^}}image_load_mip:
; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
;CHECK-LABEL: {{^}}image_load_1:
;CHECK: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
;CHECK: s_waitcnt vmcnt(0)
define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
; GCN-LABEL: {{^}}image_load_1:
; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
%elt = extractelement <4 x float> %tex, i32 0
; Only first component used, test that dmask etc. is changed accordingly
ret float %elt
}
;CHECK-LABEL: {{^}}image_load_f32_v2i32:
;CHECK: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
;CHECK: s_waitcnt vmcnt(0)
define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
; GCN-LABEL: {{^}}image_load_f32_v2i32:
; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
main_body:
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
ret float %tex
}
;CHECK-LABEL: {{^}}image_load_v2f32_v4i32:
;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
;CHECK: s_waitcnt vmcnt(0)
define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
; GCN-LABEL: {{^}}image_load_v2f32_v4i32:
; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
%tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
%tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
ret <2 x float> %tex
}
;CHECK-LABEL: {{^}}image_store_v4i32:
;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
; GCN-LABEL: {{^}}image_store_v4i32:
; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
main_body:
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
;CHECK-LABEL: {{^}}image_store_v2i32:
;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) {
; GCN-LABEL: {{^}}image_store_v2i32:
; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
main_body:
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
;CHECK-LABEL: {{^}}image_store_i32:
;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) {
; GCN-LABEL: {{^}}image_store_i32:
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
main_body:
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
;CHECK-LABEL: {{^}}image_store_f32_i32:
;CHECK: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) {
; GCN-LABEL: {{^}}image_store_f32_i32:
; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 {
main_body:
call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
ret void
}
;CHECK-LABEL: {{^}}image_store_v2f32_v4i32:
;CHECK: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) {
; GCN-LABEL: {{^}}image_store_v2f32_v4i32:
; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 {
main_body:
call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
ret void
}
;CHECK-LABEL: {{^}}image_store_mip:
;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
; GCN-LABEL: {{^}}image_store_mip:
; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
main_body:
call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
;CHECK-LABEL: {{^}}getresinfo:
;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
define amdgpu_ps void @getresinfo() {
; GCN-LABEL: {{^}}getresinfo:
; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
define amdgpu_ps void @getresinfo() #0 {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
%r0 = extractelement <4 x float> %r, i32 0
%r1 = extractelement <4 x float> %r, i32 1
%r2 = extractelement <4 x float> %r, i32 2
%r3 = extractelement <4 x float> %r, i32 3
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0
ret void
}
; Ideally, the register allocator would avoid the wait here
;
;CHECK-LABEL: {{^}}image_store_wait:
;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0) expcnt(0)
;CHECK: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
;CHECK: s_waitcnt vmcnt(0)
;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) {
; GCN-LABEL: {{^}}image_store_wait:
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0) expcnt(0)
; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0)
; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
main_body:
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0)
%data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0)
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false)
%data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %arg1, i32 15, i1 false, i1 false, i1 false, i1 false)
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %arg2, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
@ -149,21 +147,22 @@ main_body:
; VI-LABEL: image_load_mmo
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) {
store float 0.0, float addrspace(3)* %lds
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) #0 {
bb:
store float 0.000000e+00, float addrspace(3)* %lds
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
store float 0.0, float addrspace(3)* %tmp2
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tex, float %tex, float %tex, float %tex)
store float 0.000000e+00, float addrspace(3)* %tmp2
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tex, float %tex, float %tex, float %tex, i1 true, i1 true) #0
ret void
}
declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
@ -173,10 +172,9 @@ declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32,
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }

View File

@ -3,7 +3,6 @@
; RUN: llc -march=amdgcn -mcpu=kabini -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
; GCN-LABEL: {{^}}v_interp:
; GCN-NOT: s_wqm
; GCN: s_mov_b32 m0, s{{[0-9]+}}
@ -11,17 +10,17 @@
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 {
main_body:
%i = extractelement <2 x float> %4, i32 0
%j = extractelement <2 x float> %4, i32 1
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3)
%p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3)
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3)
%p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3)
%const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %3)
%i = extractelement <2 x float> %arg4, i32 0
%j = extractelement <2 x float> %arg4, i32 1
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %arg3)
%p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %arg3)
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %arg3)
%p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %arg3)
%const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg3)
%w = fadd float %p1_1, %const
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %w)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_0, float %p1_1, float %w, i1 true, i1 true) #0
ret void
}
@ -40,7 +39,8 @@ main_body:
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
define amdgpu_ps void @v_interp_p1(float %i) {
define amdgpu_ps void @v_interp_p1(float %i) #0 {
bb:
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256)
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256)
%p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256)
@ -80,7 +80,8 @@ define amdgpu_ps void @v_interp_p1(float %i) {
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
define amdgpu_ps void @v_interp_p2(float %x, float %j) {
define amdgpu_ps void @v_interp_p2(float %x, float %j) #0 {
bb:
%p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256)
%p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256)
%p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256)
@ -121,7 +122,8 @@ define amdgpu_ps void @v_interp_p2(float %x, float %j) {
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr64.y{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr64.y{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, attr64.x{{$}}
define amdgpu_ps void @v_interp_mov(float %x, float %j) {
define amdgpu_ps void @v_interp_mov(float %x, float %j) #0 {
bb:
%mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256)
%mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256)
%mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256)
@ -164,12 +166,13 @@ define amdgpu_ps void @v_interp_mov(float %x, float %j) {
; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
; VI: s_mov_b32 m0, -1{{$}}
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
store float 0.0, float addrspace(3)* %lds
define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) #0 {
bb:
store float 0.000000e+00, float addrspace(3)* %lds
%tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
store float 0.0, float addrspace(3)* %tmp2
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
store float 0.000000e+00, float addrspace(3)* %tmp2
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
ret void
}
@ -178,43 +181,44 @@ define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
; GCN-LABEL: {{^}}v_interp_p1_bank16_bug:
; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) {
define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 {
main_body:
%i.i = extractelement <2 x i32> %arg19, i32 0
%j.i = extractelement <2 x i32> %arg19, i32 1
%i.f.i = bitcast i32 %i.i to float
%j.f.i = bitcast i32 %j.i to float
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #1
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #1
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #0
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #0
%i.i7 = extractelement <2 x i32> %arg19, i32 0
%j.i8 = extractelement <2 x i32> %arg19, i32 1
%i.f.i9 = bitcast i32 %i.i7 to float
%j.f.i10 = bitcast i32 %j.i8 to float
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #1
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #1
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #0
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #0
%i.i1 = extractelement <2 x i32> %arg19, i32 0
%j.i2 = extractelement <2 x i32> %arg19, i32 1
%i.f.i3 = bitcast i32 %i.i1 to float
%j.f.i4 = bitcast i32 %j.i2 to float
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #1
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #1
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #0
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #0
%tmp = call float @llvm.fabs.f32(float %p2.i)
%tmp34 = call float @llvm.fabs.f32(float %p2.i12)
%tmp35 = call float @llvm.fabs.f32(float %p2.i6)
%tmp36 = call i32 @llvm.SI.packf16(float %tmp, float %tmp34)
%tmp37 = bitcast i32 %tmp36 to float
%tmp37 = bitcast i32 %tmp36 to <2 x half>
%tmp38 = call i32 @llvm.SI.packf16(float %tmp35, float 1.000000e+00)
%tmp39 = bitcast i32 %tmp38 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
%tmp39 = bitcast i32 %tmp38 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 true) #0
ret void
}
declare float @llvm.fabs.f32(float) #0
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
declare i32 @llvm.SI.packf16(float, float) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare float @llvm.fabs.f32(float) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare i32 @llvm.SI.packf16(float, float) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -1,24 +1,22 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}mbcnt_intrinsics:
; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) {
main_body:
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
%4 = bitcast i32 %hi to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4)
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #0
%tmp = bitcast i32 %hi to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp, float %tmp, float %tmp, float %tmp, i1 true, i1 true) #1
ret void
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #1 = { nounwind readnone }
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -1,15 +0,0 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test(i32 %p) {
%i = mul i32 %p, 2
%r = bitcast i32 %i to float
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
ret void
}
declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

View File

@ -1,15 +0,0 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test(i32 %p) {
%i = udiv i32 %p, 2
%r = bitcast i32 %i to float
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
ret void
}
declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

View File

@ -1,17 +0,0 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
define void @test(i32 %p) {
%i = udiv i32 %p, 3
%r = bitcast i32 %i to float
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
ret void
}
declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

View File

@ -1,25 +1,24 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; GCN-LABEL: {{^}}vgpr:
; GCN: v_mov_b32_e32 v1, v0
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
%x = fadd float %3, 1.0
%a = insertvalue {float, float} undef, float %x, 0
%b = insertvalue {float, float} %a, float %3, 1
ret {float, float} %b
define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%x = fadd float %arg3, 1.000000e+00
%a = insertvalue { float, float } undef, float %x, 0
%b = insertvalue { float, float } %a, float %arg3, 1
ret { float, float } %b
}
; GCN-LABEL: {{^}}vgpr_literal:
; GCN: v_mov_b32_e32 v4, v0
; GCN: exp mrt0 v4, v4, v4, v4 done compr vm
; GCN: exp mrt0 v4, v4, v4, v4 done vm
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: v_mov_b32_e32 v1, 2.0
@ -27,12 +26,12 @@ define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 i
; GCN-DAG: v_mov_b32_e32 v3, -1.0
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@ -44,24 +43,24 @@ define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addr
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v6
; GCN-NOT: s_endpgm
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
%i3 = extractelement <2 x i32> %8, i32 0
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
ret {float, float, float, float, float} %r4
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN: .long 165580
; GCN-NEXT: .long 1
; GCN-NEXT: .long 165584
@ -69,11 +68,11 @@ define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
; GCN: v_mov_b32_e32 v0, 1.0
; GCN-NOT: s_endpgm
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
ret float 1.0
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
ret float 1.000000e+00
}
; GCN: .long 165580
; GCN-NEXT: .long 2081
; GCN-NEXT: .long 165584
@ -83,14 +82,14 @@ define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byv
; GCN-DAG: v_mov_b32_e32 v1, v2
; GCN: v_mov_b32_e32 v2, v3
; GCN-NOT: s_endpgm
define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
%f = bitcast <2 x i32> %8 to <2 x float>
%s = insertvalue {float, <2 x float>} undef, float %14, 0
%s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
ret {float, <2 x float>} %s1
define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%f = bitcast <2 x i32> %arg8 to <2 x float>
%s = insertvalue { float, <2 x float> } undef, float %arg14, 0
%s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
ret { float, <2 x float> } %s1
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@ -102,25 +101,24 @@ define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrsp
; GCN-DAG: v_mov_b32_e32 v3, v6
; GCN-DAG: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
attributes #1 = { "InitialPSInputAddr"="1" }
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
%i3 = extractelement <2 x i32> %8, i32 0
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
ret {float, float, float, float, float} %r4
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@ -132,25 +130,24 @@ define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i
; GCN: v_mov_b32_e32 v3, v8
; GCN: v_mov_b32_e32 v4, v12
; GCN-NOT: s_endpgm
attributes #2 = { "InitialPSInputAddr"="119" }
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
%i3 = extractelement <2 x i32> %8, i32 0
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
ret {float, float, float, float, float} %r4
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@ -162,38 +159,37 @@ define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
attributes #3 = { "InitialPSInputAddr"="418" }
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
%i0 = extractelement <2 x i32> %4, i32 0
%i1 = extractelement <2 x i32> %4, i32 1
%i2 = extractelement <2 x i32> %7, i32 0
%i3 = extractelement <2 x i32> %8, i32 0
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
ret {float, float, float, float, float} %r4
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN-LABEL: {{^}}sgpr:
; GCN: s_add_i32 s0, s3, 2
; GCN: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
%x = add i32 %2, 2
%a = insertvalue {i32, i32, i32} undef, i32 %x, 0
%b = insertvalue {i32, i32, i32} %a, i32 %1, 1
%c = insertvalue {i32, i32, i32} %a, i32 %2, 2
ret {i32, i32, i32} %c
define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
%a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
%b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
%c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
ret { i32, i32, i32 } %c
}
; GCN-LABEL: {{^}}sgpr_literal:
; GCN: s_mov_b32 s0, 5
; GCN-NOT: s_mov_b32 s0, s0
@ -201,37 +197,37 @@ define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32
; GCN-DAG: s_mov_b32 s2, 7
; GCN-DAG: s_mov_b32 s3, 8
; GCN-NOT: s_endpgm
define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
%x = add i32 %2, 2
ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
}
; GCN-LABEL: {{^}}both:
; GCN: v_mov_b32_e32 v1, v0
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
; GCN-DAG: s_add_i32 s0, s3, 2
; GCN-DAG: s_mov_b32 s1, s2
; GCN: s_mov_b32 s2, s3
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
%v = fadd float %3, 1.0
%s = add i32 %2, 2
%a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
%a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
%a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
%a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
%a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
ret {float, i32, float, i32, i32} %a4
define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%v = fadd float %arg3, 1.000000e+00
%s = add i32 %arg2, 2
%a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
%a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
%a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
%a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
%a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
ret { float, i32, float, i32, i32 } %a4
}
; GCN-LABEL: {{^}}structure_literal:
; GCN: v_mov_b32_e32 v3, v0
; GCN: exp mrt0 v3, v3, v3, v3 done compr vm
; GCN: exp mrt0 v3, v3, v3, v3 done vm
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: s_mov_b32 s0, 2
@ -239,9 +235,16 @@ define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
; GCN: s_waitcnt expcnt(0)
define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
}
attributes #0 = { nounwind "InitialPSInputAddr"="0" }
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind "InitialPSInputAddr"="0" }
attributes #2 = { nounwind "InitialPSInputAddr"="1" }
attributes #3 = { nounwind "InitialPSInputAddr"="119" }
attributes #4 = { nounwind "InitialPSInputAddr"="418" }

View File

@ -4,12 +4,9 @@
; CHECK-LABEL: {{^}}main:
; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
define amdgpu_ps float @main(float inreg %p) {
main_body:
%c = fcmp oeq float %p, %p
%r = select i1 %c, float 1.000000e+00, float 0.000000e+00
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
ret void
ret float %r
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

View File

@ -4,12 +4,9 @@
; CHECK-LABEL: {{^}}main:
; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
define amdgpu_ps float @main(float inreg %p) {
main_body:
%c = fcmp une float %p, %p
%r = select i1 %c, float 1.000000e+00, float 0.000000e+00
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
ret void
ret float %r
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

View File

@ -1,13 +1,10 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@ -25,13 +22,13 @@ ELSE: ; preds = %main_body
ENDIF: ; preds = %ELSE, %main_body
%temp.0 = phi float [ %tmp26, %ELSE ], [ %tmp21, %main_body ]
%tmp27 = fadd float %temp.0, %tmp23
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
ret void
}
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@ -58,32 +55,32 @@ main_body:
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
%j.f.i = bitcast i32 %j.i to float
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #1
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #1
%i.i19 = extractelement <2 x i32> %arg5, i32 0
%j.i20 = extractelement <2 x i32> %arg5, i32 1
%i.f.i21 = bitcast i32 %i.i19 to float
%j.f.i22 = bitcast i32 %j.i20 to float
%p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #0
%p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #0
%p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #1
%p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #1
%i.i13 = extractelement <2 x i32> %arg5, i32 0
%j.i14 = extractelement <2 x i32> %arg5, i32 1
%i.f.i15 = bitcast i32 %i.i13 to float
%j.f.i16 = bitcast i32 %j.i14 to float
%p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #0
%p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #0
%p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #1
%p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #1
%i.i7 = extractelement <2 x i32> %arg5, i32 0
%j.i8 = extractelement <2 x i32> %arg5, i32 1
%i.f.i9 = bitcast i32 %i.i7 to float
%j.f.i10 = bitcast i32 %j.i8 to float
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #0
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #0
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #1
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #1
%i.i1 = extractelement <2 x i32> %arg5, i32 0
%j.i2 = extractelement <2 x i32> %arg5, i32 1
%i.f.i3 = bitcast i32 %i.i1 to float
%j.f.i4 = bitcast i32 %j.i2 to float
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #0
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #0
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
%tmp45 = bitcast float %p2.i to i32
%tmp46 = bitcast float %p2.i24 to i32
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
@ -168,16 +165,16 @@ ENDIF24: ; preds = %IF25, %ENDIF
%tmp111 = fsub float -0.000000e+00, %tmp105
%tmp112 = fmul float %tmp111, %tmp106
%tmp113 = call i32 @llvm.SI.packf16(float %tmp108, float %tmp110)
%tmp114 = bitcast i32 %tmp113 to float
%tmp114 = bitcast i32 %tmp113 to <2 x half>
%tmp115 = call i32 @llvm.SI.packf16(float %tmp112, float 1.000000e+00)
%tmp116 = bitcast i32 %tmp115 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp114, float %tmp116, float %tmp114, float %tmp116)
%tmp116 = bitcast i32 %tmp115 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp114, <2 x half> %tmp116, i1 true, i1 true) #0
ret void
}
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@ -204,7 +201,7 @@ LOOP: ; preds = %ENDIF, %main_body
br i1 %tmp33, label %IF, label %ENDIF
IF: ; preds = %LOOP
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00, i1 true, i1 true) #0
ret void
ENDIF: ; preds = %LOOP
@ -230,7 +227,7 @@ ENDIF: ; preds = %LOOP
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@ -261,7 +258,7 @@ endif: ; preds = %else, %if
%val.0 = phi float [ %val.if.0, %if ], [ %val.else.0, %else ]
%val.1 = phi float [ %val.if.1, %if ], [ %val.else.1, %else ]
%val.2 = phi float [ %val.if.2, %if ], [ %val.else.2, %else ]
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.000000e+00)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %val.0, float %val.1, float %val.2, float 0.000000e+00, i1 true, i1 true) #0
ret void
}
@ -294,7 +291,7 @@ endif: ; preds = %if1, %if0, %entry
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@ -308,7 +305,7 @@ LOOP68: ; preds = %ENDIF69, %entry
IF70: ; preds = %LOOP68
%q = icmp ne i32 %l, 13
%temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
ret void
ENDIF69: ; preds = %LOOP68
@ -330,7 +327,7 @@ ENDIF69: ; preds = %LOOP68
; [[END]]:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #1 {
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
%tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
@ -343,14 +340,14 @@ bb:
%j.i = extractelement <2 x i32> %arg7, i32 1
%i.f.i = bitcast i32 %i.i to float
%j.f.i = bitcast i32 %j.i to float
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #1
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #1
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #0
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #0
%i.i1 = extractelement <2 x i32> %arg7, i32 0
%j.i2 = extractelement <2 x i32> %arg7, i32 1
%i.f.i3 = bitcast i32 %i.i1 to float
%j.f.i4 = bitcast i32 %j.i2 to float
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #0
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #0
%tmp31 = bitcast float %tmp23 to i32
%tmp36 = icmp ne i32 %tmp31, 0
br i1 %tmp36, label %bb38, label %bb80
@ -377,80 +374,58 @@ bb80: ; preds = %bb
bb71: ; preds = %bb80, %bb38
%tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
%tmp88 = extractelement <4 x float> %tmp72, i32 0
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp88, float %tmp88, float %tmp88, float %tmp88, i1 true, i1 true) #0
ret void
}
; Check the the resource descriptor is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #1 {
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call i32 @llvm.SI.packf16(float undef, float %tmp10)
%tmp13 = bitcast i32 %tmp12 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
%tmp13 = bitcast i32 %tmp12 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
ret void
}
; Check the the sampler is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #1 {
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
%tmp13 = bitcast i32 %tmp12 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
%tmp13 = bitcast i32 %tmp12 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
declare float @llvm.fabs.f32(float) #1
declare float @llvm.amdgcn.rsq.f32(float) #1
declare float @llvm.exp2.f32(float) #1
declare float @llvm.pow.f32(float, float) #1
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #0
declare i32 @llvm.SI.packf16(float, float) #1
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #0
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.rsq.f32(float) #0
; Function Attrs: nounwind readnone
declare float @llvm.exp2.f32(float) #0
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #0
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #0
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", !2}

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=SI %s
; XUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #0
@ -466,4 +466,12 @@ define void @s_shl_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 a
ret void
}
; FUNC-LABEL: {{^}}test_mul2:
; GCN: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test_mul2(i32 %p) {
%i = mul i32 %p, 2
store volatile i32 %i, i32 addrspace(1)* undef
ret void
}
attributes #0 = { nounwind readnone }

View File

@ -1,14 +0,0 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}main:
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
define amdgpu_vs void @main(float) {
main_body:
%1 = fmul float %0, 0x3FE86A7F00000000
%2 = fmul float %0, 0xBFE86A7F00000000
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %1, float %1, float %2, float %2)
ret void
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

View File

@ -1,11 +1,11 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; This shader has the potential to generated illegal VGPR to SGPR copies if
; the wrong register class is used for the REG_SEQUENCE instructions.
; CHECK: {{^}}main:
; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
; GCN-LABEL: {{^}}main:
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
@ -40,26 +40,16 @@ main_body:
%tmp37 = extractelement <4 x float> %tmp35, i32 1
%tmp38 = extractelement <4 x float> %tmp35, i32 2
%tmp39 = extractelement <4 x float> %tmp35, i32 3
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp36, float %tmp37, float %tmp38, float %tmp39)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -3,7 +3,7 @@
; The only way the subtarget knows that the si machine scheduler is being used
; is to specify -mattr=si-scheduler. If we just pass --misched=si, the backend
; won't know what scheduler we are using.
; RUN: llc -march=amdgcn -mcpu=SI --misched=si -mattr=si-scheduler < %s | FileCheck %s
; RUN: llc -march=amdgcn --misched=si -mattr=si-scheduler < %s | FileCheck %s
; The test checks the "si" machine scheduler pass works correctly.
@ -16,7 +16,7 @@
; CHECK: s_waitcnt vmcnt(0)
; CHECK: exp
; CHECK: s_endpgm
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
main_body:
%tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
%tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
@ -46,29 +46,22 @@ main_body:
%tmp34 = extractelement <4 x float> %tmp31, i32 2
%tmp35 = extractelement <4 x float> %tmp31, i32 3
%tmp36 = call i32 @llvm.SI.packf16(float %tmp32, float %tmp33)
%tmp37 = bitcast i32 %tmp36 to float
%tmp37 = bitcast i32 %tmp36 to <2 x half>
%tmp38 = call i32 @llvm.SI.packf16(float %tmp34, float %tmp35)
%tmp39 = bitcast i32 %tmp38 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
%tmp39 = bitcast i32 %tmp38 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 false) #0
ret void
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #0
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", !2}

View File

@ -732,10 +732,10 @@ IF67: ; preds = %LOOP65
%tmp579 = fmul float %tmp574, %tmp45
%tmp580 = fadd float %tmp579, %tmp556
%tmp581 = call i32 @llvm.SI.packf16(float %tmp576, float %tmp578)
%tmp582 = bitcast i32 %tmp581 to float
%tmp582 = bitcast i32 %tmp581 to <2 x half>
%tmp583 = call i32 @llvm.SI.packf16(float %tmp580, float %tmp282)
%tmp584 = bitcast i32 %tmp583 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp582, float %tmp584, float %tmp582, float %tmp584)
%tmp584 = bitcast i32 %tmp583 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp582, <2 x half> %tmp584, i1 true, i1 true) #0
ret void
ENDIF66: ; preds = %LOOP65
@ -1814,10 +1814,10 @@ ENDIF209: ; preds = %ELSE214, %ELSE211,
%max.0.i1 = call float @llvm.maxnum.f32(float %tmp774, float 0.000000e+00)
%clamp.i2 = call float @llvm.minnum.f32(float %max.0.i1, float 1.000000e+00)
%tmp776 = call i32 @llvm.SI.packf16(float %tmp768, float %tmp770)
%tmp777 = bitcast i32 %tmp776 to float
%tmp777 = bitcast i32 %tmp776 to <2 x half>
%tmp778 = call i32 @llvm.SI.packf16(float %tmp772, float %clamp.i2)
%tmp779 = bitcast i32 %tmp778 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp777, float %tmp779, float %tmp777, float %tmp779)
%tmp779 = bitcast i32 %tmp778 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp777, <2 x half> %tmp779, i1 true, i1 true) #0
ret void
ELSE214: ; preds = %ELSE211
@ -1835,11 +1835,11 @@ ELSE214: ; preds = %ELSE211
declare float @llvm.exp2.f32(float) #1
declare float @llvm.ceil.f32(float) #1
declare float @llvm.amdgcn.rsq.f32(float) #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.pow.f32(float, float) #1
declare float @llvm.minnum.f32(float, float) #1
declare float @llvm.maxnum.f32(float, float) #1
declare float @llvm.amdgcn.rsq.f32(float) #1
declare float @llvm.amdgcn.cubeid(float, float, float) #1
declare float @llvm.amdgcn.cubesc(float, float, float) #1
declare float @llvm.amdgcn.cubetc(float, float, float) #1
@ -1848,13 +1848,14 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -6,270 +6,271 @@
; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
; SI-NOT: v_readlane_b32 [[SAVED]]
define amdgpu_ps void @main() #0 {
main_body:
%0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
%1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
%2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
%3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
%4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
%5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
%6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
%7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
%8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
%9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
%10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
%11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
%12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
%13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
%14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
%15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
%16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
%17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
%18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
%19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
%20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
%21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
%22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
%23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
%24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
%25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
%26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
%27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
%28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
%29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
%30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
%31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
%32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
%33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
%34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
%35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
%36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
%37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
%38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
%39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
%40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
%41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
%42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
%43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
%44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
%45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
%46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
%47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
%48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
%49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
%50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
%51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
%52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
%53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
%54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
%55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
%56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
%57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
%58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
%59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
%60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
%61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
%62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
%63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
%64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
%65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
%66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
%tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
%tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
%tmp3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
%tmp4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
%tmp5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
%tmp6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
%tmp7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
%tmp8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
%tmp9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
%tmp10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
%tmp11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
%tmp12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
%tmp13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
%tmp14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
%tmp15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
%tmp16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
%tmp17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
%tmp18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
%tmp19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
%tmp20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
%tmp21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
%tmp22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
%tmp23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
%tmp24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
%tmp25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
%tmp26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
%tmp27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
%tmp28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
%tmp29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
%tmp30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
%tmp31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
%tmp32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
%tmp33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
%tmp34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
%tmp35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
%tmp36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
%tmp37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
%tmp38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
%tmp39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
%tmp40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
%tmp41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
%tmp42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
%tmp43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
%tmp44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
%tmp45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
%tmp46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
%tmp47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
%tmp48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
%tmp49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
%tmp50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
%tmp51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
%tmp52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
%tmp53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
%tmp54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
%tmp55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
%tmp56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
%tmp57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
%tmp58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
%tmp59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
%tmp60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
%tmp61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
%tmp62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
%tmp63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
%tmp64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
%tmp65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
%tmp66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
br label %LOOP
LOOP: ; preds = %ENDIF2795, %main_body
%temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
%temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%67 = icmp sgt i32 %tid, 4
br i1 %67, label %ENDLOOP, label %ENDIF
%tmp67 = icmp sgt i32 %tid, 4
br i1 %tmp67, label %ENDLOOP, label %ENDIF
ENDLOOP: ; preds = %ELSE2566, %LOOP
%one.sub.a.i = fsub float 1.000000e+00, %0
%one.sub.a.i = fsub float 1.000000e+00, %tmp
%one.sub.ac.i = fmul float %one.sub.a.i, undef
%result.i = fadd float fmul (float undef, float undef), %one.sub.ac.i
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %result.i, float undef, float 1.000000e+00)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float %result.i, float undef, float 1.000000e+00, i1 true, i1 true) #0
ret void
ENDIF: ; preds = %LOOP
%68 = fsub float %2, undef
%69 = fsub float %3, undef
%70 = fsub float %4, undef
%71 = fmul float %68, 0.000000e+00
%72 = fmul float %69, undef
%73 = fmul float %70, undef
%74 = fsub float %6, undef
%75 = fsub float %7, undef
%76 = fmul float %74, undef
%77 = fmul float %75, 0.000000e+00
%78 = call float @llvm.minnum.f32(float %73, float %77)
%79 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00)
%80 = call float @llvm.maxnum.f32(float %72, float %76)
%81 = call float @llvm.maxnum.f32(float undef, float %78)
%82 = call float @llvm.minnum.f32(float %79, float %80)
%83 = call float @llvm.minnum.f32(float %82, float undef)
%84 = fsub float %14, undef
%85 = fsub float %15, undef
%86 = fsub float %16, undef
%87 = fmul float %84, undef
%88 = fmul float %85, undef
%89 = fmul float %86, undef
%90 = fsub float %17, undef
%91 = fsub float %18, undef
%92 = fsub float %19, undef
%93 = fmul float %90, 0.000000e+00
%94 = fmul float %91, undef
%95 = fmul float %92, undef
%96 = call float @llvm.minnum.f32(float %88, float %94)
%97 = call float @llvm.maxnum.f32(float %87, float %93)
%98 = call float @llvm.maxnum.f32(float %89, float %95)
%99 = call float @llvm.maxnum.f32(float undef, float %96)
%100 = call float @llvm.maxnum.f32(float %99, float undef)
%101 = call float @llvm.minnum.f32(float %97, float undef)
%102 = call float @llvm.minnum.f32(float %101, float %98)
%103 = fsub float %30, undef
%104 = fsub float %31, undef
%105 = fmul float %103, 0.000000e+00
%106 = fmul float %104, 0.000000e+00
%107 = call float @llvm.minnum.f32(float undef, float %105)
%108 = call float @llvm.maxnum.f32(float undef, float %106)
%109 = call float @llvm.maxnum.f32(float undef, float %107)
%110 = call float @llvm.maxnum.f32(float %109, float undef)
%111 = call float @llvm.minnum.f32(float undef, float %108)
%112 = fsub float %32, undef
%113 = fsub float %33, undef
%114 = fsub float %34, undef
%115 = fmul float %112, 0.000000e+00
%116 = fmul float %113, undef
%117 = fmul float %114, undef
%118 = fsub float %35, undef
%119 = fsub float %36, undef
%120 = fsub float %37, undef
%121 = fmul float %118, undef
%122 = fmul float %119, undef
%123 = fmul float %120, undef
%124 = call float @llvm.minnum.f32(float %115, float %121)
%125 = call float @llvm.minnum.f32(float %116, float %122)
%126 = call float @llvm.minnum.f32(float %117, float %123)
%127 = call float @llvm.maxnum.f32(float %124, float %125)
%128 = call float @llvm.maxnum.f32(float %127, float %126)
%129 = fsub float %38, undef
%130 = fsub float %39, undef
%131 = fsub float %40, undef
%132 = fmul float %129, 0.000000e+00
%133 = fmul float %130, undef
%134 = fmul float %131, undef
%135 = fsub float %41, undef
%136 = fsub float %42, undef
%137 = fsub float %43, undef
%138 = fmul float %135, undef
%139 = fmul float %136, undef
%140 = fmul float %137, undef
%141 = call float @llvm.minnum.f32(float %132, float %138)
%142 = call float @llvm.minnum.f32(float %133, float %139)
%143 = call float @llvm.minnum.f32(float %134, float %140)
%144 = call float @llvm.maxnum.f32(float %141, float %142)
%145 = call float @llvm.maxnum.f32(float %144, float %143)
%146 = fsub float %44, undef
%147 = fsub float %45, undef
%148 = fsub float %46, undef
%149 = fmul float %146, 0.000000e+00
%150 = fmul float %147, 0.000000e+00
%151 = fmul float %148, undef
%152 = fsub float %47, undef
%153 = fsub float %48, undef
%154 = fsub float %49, undef
%155 = fmul float %152, undef
%156 = fmul float %153, 0.000000e+00
%157 = fmul float %154, undef
%158 = call float @llvm.minnum.f32(float %149, float %155)
%159 = call float @llvm.minnum.f32(float %150, float %156)
%160 = call float @llvm.minnum.f32(float %151, float %157)
%161 = call float @llvm.maxnum.f32(float %158, float %159)
%162 = call float @llvm.maxnum.f32(float %161, float %160)
%163 = fsub float %50, undef
%164 = fsub float %51, undef
%165 = fsub float %52, undef
%166 = fmul float %163, undef
%167 = fmul float %164, 0.000000e+00
%168 = fmul float %165, 0.000000e+00
%169 = fsub float %53, undef
%170 = fsub float %54, undef
%171 = fsub float %55, undef
%172 = fdiv float 1.000000e+00, %temp18.0
%173 = fmul float %169, undef
%174 = fmul float %170, undef
%175 = fmul float %171, %172
%176 = call float @llvm.minnum.f32(float %166, float %173)
%177 = call float @llvm.minnum.f32(float %167, float %174)
%178 = call float @llvm.minnum.f32(float %168, float %175)
%179 = call float @llvm.maxnum.f32(float %176, float %177)
%180 = call float @llvm.maxnum.f32(float %179, float %178)
%181 = fsub float %62, undef
%182 = fsub float %63, undef
%183 = fsub float %64, undef
%184 = fmul float %181, 0.000000e+00
%185 = fmul float %182, undef
%186 = fmul float %183, undef
%187 = fsub float %65, undef
%188 = fsub float %66, undef
%189 = fmul float %187, undef
%190 = fmul float %188, undef
%191 = call float @llvm.maxnum.f32(float %184, float %189)
%192 = call float @llvm.maxnum.f32(float %185, float %190)
%193 = call float @llvm.maxnum.f32(float %186, float undef)
%194 = call float @llvm.minnum.f32(float %191, float %192)
%195 = call float @llvm.minnum.f32(float %194, float %193)
%.temp292.7 = select i1 undef, float %162, float undef
%temp292.9 = select i1 false, float %180, float %.temp292.7
%tmp68 = fsub float %tmp2, undef
%tmp69 = fsub float %tmp3, undef
%tmp70 = fsub float %tmp4, undef
%tmp71 = fmul float %tmp68, 0.000000e+00
%tmp72 = fmul float %tmp69, undef
%tmp73 = fmul float %tmp70, undef
%tmp74 = fsub float %tmp6, undef
%tmp75 = fsub float %tmp7, undef
%tmp76 = fmul float %tmp74, undef
%tmp77 = fmul float %tmp75, 0.000000e+00
%tmp78 = call float @llvm.minnum.f32(float %tmp73, float %tmp77)
%tmp79 = call float @llvm.maxnum.f32(float %tmp71, float 0.000000e+00)
%tmp80 = call float @llvm.maxnum.f32(float %tmp72, float %tmp76)
%tmp81 = call float @llvm.maxnum.f32(float undef, float %tmp78)
%tmp82 = call float @llvm.minnum.f32(float %tmp79, float %tmp80)
%tmp83 = call float @llvm.minnum.f32(float %tmp82, float undef)
%tmp84 = fsub float %tmp14, undef
%tmp85 = fsub float %tmp15, undef
%tmp86 = fsub float %tmp16, undef
%tmp87 = fmul float %tmp84, undef
%tmp88 = fmul float %tmp85, undef
%tmp89 = fmul float %tmp86, undef
%tmp90 = fsub float %tmp17, undef
%tmp91 = fsub float %tmp18, undef
%tmp92 = fsub float %tmp19, undef
%tmp93 = fmul float %tmp90, 0.000000e+00
%tmp94 = fmul float %tmp91, undef
%tmp95 = fmul float %tmp92, undef
%tmp96 = call float @llvm.minnum.f32(float %tmp88, float %tmp94)
%tmp97 = call float @llvm.maxnum.f32(float %tmp87, float %tmp93)
%tmp98 = call float @llvm.maxnum.f32(float %tmp89, float %tmp95)
%tmp99 = call float @llvm.maxnum.f32(float undef, float %tmp96)
%tmp100 = call float @llvm.maxnum.f32(float %tmp99, float undef)
%tmp101 = call float @llvm.minnum.f32(float %tmp97, float undef)
%tmp102 = call float @llvm.minnum.f32(float %tmp101, float %tmp98)
%tmp103 = fsub float %tmp30, undef
%tmp104 = fsub float %tmp31, undef
%tmp105 = fmul float %tmp103, 0.000000e+00
%tmp106 = fmul float %tmp104, 0.000000e+00
%tmp107 = call float @llvm.minnum.f32(float undef, float %tmp105)
%tmp108 = call float @llvm.maxnum.f32(float undef, float %tmp106)
%tmp109 = call float @llvm.maxnum.f32(float undef, float %tmp107)
%tmp110 = call float @llvm.maxnum.f32(float %tmp109, float undef)
%tmp111 = call float @llvm.minnum.f32(float undef, float %tmp108)
%tmp112 = fsub float %tmp32, undef
%tmp113 = fsub float %tmp33, undef
%tmp114 = fsub float %tmp34, undef
%tmp115 = fmul float %tmp112, 0.000000e+00
%tmp116 = fmul float %tmp113, undef
%tmp117 = fmul float %tmp114, undef
%tmp118 = fsub float %tmp35, undef
%tmp119 = fsub float %tmp36, undef
%tmp120 = fsub float %tmp37, undef
%tmp121 = fmul float %tmp118, undef
%tmp122 = fmul float %tmp119, undef
%tmp123 = fmul float %tmp120, undef
%tmp124 = call float @llvm.minnum.f32(float %tmp115, float %tmp121)
%tmp125 = call float @llvm.minnum.f32(float %tmp116, float %tmp122)
%tmp126 = call float @llvm.minnum.f32(float %tmp117, float %tmp123)
%tmp127 = call float @llvm.maxnum.f32(float %tmp124, float %tmp125)
%tmp128 = call float @llvm.maxnum.f32(float %tmp127, float %tmp126)
%tmp129 = fsub float %tmp38, undef
%tmp130 = fsub float %tmp39, undef
%tmp131 = fsub float %tmp40, undef
%tmp132 = fmul float %tmp129, 0.000000e+00
%tmp133 = fmul float %tmp130, undef
%tmp134 = fmul float %tmp131, undef
%tmp135 = fsub float %tmp41, undef
%tmp136 = fsub float %tmp42, undef
%tmp137 = fsub float %tmp43, undef
%tmp138 = fmul float %tmp135, undef
%tmp139 = fmul float %tmp136, undef
%tmp140 = fmul float %tmp137, undef
%tmp141 = call float @llvm.minnum.f32(float %tmp132, float %tmp138)
%tmp142 = call float @llvm.minnum.f32(float %tmp133, float %tmp139)
%tmp143 = call float @llvm.minnum.f32(float %tmp134, float %tmp140)
%tmp144 = call float @llvm.maxnum.f32(float %tmp141, float %tmp142)
%tmp145 = call float @llvm.maxnum.f32(float %tmp144, float %tmp143)
%tmp146 = fsub float %tmp44, undef
%tmp147 = fsub float %tmp45, undef
%tmp148 = fsub float %tmp46, undef
%tmp149 = fmul float %tmp146, 0.000000e+00
%tmp150 = fmul float %tmp147, 0.000000e+00
%tmp151 = fmul float %tmp148, undef
%tmp152 = fsub float %tmp47, undef
%tmp153 = fsub float %tmp48, undef
%tmp154 = fsub float %tmp49, undef
%tmp155 = fmul float %tmp152, undef
%tmp156 = fmul float %tmp153, 0.000000e+00
%tmp157 = fmul float %tmp154, undef
%tmp158 = call float @llvm.minnum.f32(float %tmp149, float %tmp155)
%tmp159 = call float @llvm.minnum.f32(float %tmp150, float %tmp156)
%tmp160 = call float @llvm.minnum.f32(float %tmp151, float %tmp157)
%tmp161 = call float @llvm.maxnum.f32(float %tmp158, float %tmp159)
%tmp162 = call float @llvm.maxnum.f32(float %tmp161, float %tmp160)
%tmp163 = fsub float %tmp50, undef
%tmp164 = fsub float %tmp51, undef
%tmp165 = fsub float %tmp52, undef
%tmp166 = fmul float %tmp163, undef
%tmp167 = fmul float %tmp164, 0.000000e+00
%tmp168 = fmul float %tmp165, 0.000000e+00
%tmp169 = fsub float %tmp53, undef
%tmp170 = fsub float %tmp54, undef
%tmp171 = fsub float %tmp55, undef
%tmp172 = fdiv float 1.000000e+00, %temp18.0
%tmp173 = fmul float %tmp169, undef
%tmp174 = fmul float %tmp170, undef
%tmp175 = fmul float %tmp171, %tmp172
%tmp176 = call float @llvm.minnum.f32(float %tmp166, float %tmp173)
%tmp177 = call float @llvm.minnum.f32(float %tmp167, float %tmp174)
%tmp178 = call float @llvm.minnum.f32(float %tmp168, float %tmp175)
%tmp179 = call float @llvm.maxnum.f32(float %tmp176, float %tmp177)
%tmp180 = call float @llvm.maxnum.f32(float %tmp179, float %tmp178)
%tmp181 = fsub float %tmp62, undef
%tmp182 = fsub float %tmp63, undef
%tmp183 = fsub float %tmp64, undef
%tmp184 = fmul float %tmp181, 0.000000e+00
%tmp185 = fmul float %tmp182, undef
%tmp186 = fmul float %tmp183, undef
%tmp187 = fsub float %tmp65, undef
%tmp188 = fsub float %tmp66, undef
%tmp189 = fmul float %tmp187, undef
%tmp190 = fmul float %tmp188, undef
%tmp191 = call float @llvm.maxnum.f32(float %tmp184, float %tmp189)
%tmp192 = call float @llvm.maxnum.f32(float %tmp185, float %tmp190)
%tmp193 = call float @llvm.maxnum.f32(float %tmp186, float undef)
%tmp194 = call float @llvm.minnum.f32(float %tmp191, float %tmp192)
%tmp195 = call float @llvm.minnum.f32(float %tmp194, float %tmp193)
%.temp292.7 = select i1 undef, float %tmp162, float undef
%temp292.9 = select i1 false, float %tmp180, float %.temp292.7
%.temp292.9 = select i1 undef, float undef, float %temp292.9
%196 = fcmp ogt float undef, 0.000000e+00
%197 = fcmp olt float undef, %195
%198 = and i1 %196, %197
%199 = fcmp olt float undef, %.temp292.9
%200 = and i1 %198, %199
%temp292.11 = select i1 %200, float undef, float %.temp292.9
%tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
%tmp196 = fcmp ogt float undef, 0.000000e+00
%tmp197 = fcmp olt float undef, %tmp195
%tmp198 = and i1 %tmp196, %tmp197
%tmp199 = fcmp olt float undef, %.temp292.9
%tmp200 = and i1 %tmp198, %tmp199
%temp292.11 = select i1 %tmp200, float undef, float %.temp292.9
%tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp0 = icmp eq i32 %tid0, 0
br i1 %cmp0, label %IF2565, label %ELSE2566
IF2565: ; preds = %ENDIF
%tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
%tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp1 = icmp eq i32 %tid1, 0
br i1 %cmp1, label %ENDIF2582, label %ELSE2584
ELSE2566: ; preds = %ENDIF
%tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
%tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%tidf = bitcast i32 %tid2 to float
%201 = fcmp oeq float %temp292.11, %tidf
br i1 %201, label %ENDLOOP, label %ELSE2593
%tmp201 = fcmp oeq float %temp292.11, %tidf
br i1 %tmp201, label %ENDLOOP, label %ELSE2593
ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
%temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
%temp18.1 = phi float [ %218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
%202 = fsub float %5, undef
%203 = fmul float %202, undef
%204 = call float @llvm.maxnum.f32(float undef, float %203)
%205 = call float @llvm.minnum.f32(float %204, float undef)
%206 = call float @llvm.minnum.f32(float %205, float undef)
%207 = fcmp ogt float undef, 0.000000e+00
%208 = fcmp olt float undef, 1.000000e+00
%209 = and i1 %207, %208
%tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
%temp18.1 = phi float [ %tmp218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
%tmp202 = fsub float %tmp5, undef
%tmp203 = fmul float %tmp202, undef
%tmp204 = call float @llvm.maxnum.f32(float undef, float %tmp203)
%tmp205 = call float @llvm.minnum.f32(float %tmp204, float undef)
%tmp206 = call float @llvm.minnum.f32(float %tmp205, float undef)
%tmp207 = fcmp ogt float undef, 0.000000e+00
%tmp208 = fcmp olt float undef, 1.000000e+00
%tmp209 = and i1 %tmp207, %tmp208
%tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%tidf3 = bitcast i32 %tid3 to float
%210 = fcmp olt float %tidf3, %206
%211 = and i1 %209, %210
br i1 %211, label %ENDIF2795, label %ELSE2797
%tmp210 = fcmp olt float %tidf3, %tmp206
%tmp211 = and i1 %tmp209, %tmp210
br i1 %tmp211, label %ENDIF2795, label %ELSE2797
ELSE2584: ; preds = %IF2565
br label %ENDIF2582
ENDIF2582: ; preds = %ELSE2584, %IF2565
%212 = fadd float %1, undef
%213 = fadd float 0.000000e+00, %212
%floor = call float @llvm.floor.f32(float %213)
%214 = fsub float %213, %floor
%tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
%tmp212 = fadd float %tmp1, undef
%tmp213 = fadd float 0.000000e+00, %tmp212
%floor = call float @llvm.floor.f32(float %tmp213)
%tmp214 = fsub float %tmp213, %floor
%tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp4 = icmp eq i32 %tid4, 0
br i1 %cmp4, label %IF2589, label %ELSE2590
@ -280,61 +281,61 @@ ELSE2590: ; preds = %ENDIF2582
br label %ENDIF2588
ENDIF2588: ; preds = %ELSE2590, %IF2589
%215 = fsub float 1.000000e+00, %214
%216 = call float @llvm.sqrt.f32(float %215)
%217 = fmul float %216, undef
%218 = fadd float %217, undef
%tmp215 = fsub float 1.000000e+00, %tmp214
%tmp216 = call float @llvm.sqrt.f32(float %tmp215)
%tmp217 = fmul float %tmp216, undef
%tmp218 = fadd float %tmp217, undef
br label %ENDIF2564
ELSE2593: ; preds = %ELSE2566
%219 = fcmp oeq float %temp292.11, %81
%220 = fcmp olt float %81, %83
%221 = and i1 %219, %220
br i1 %221, label %ENDIF2594, label %ELSE2596
%tmp219 = fcmp oeq float %temp292.11, %tmp81
%tmp220 = fcmp olt float %tmp81, %tmp83
%tmp221 = and i1 %tmp219, %tmp220
br i1 %tmp221, label %ENDIF2594, label %ELSE2596
ELSE2596: ; preds = %ELSE2593
%222 = fcmp oeq float %temp292.11, %100
%223 = fcmp olt float %100, %102
%224 = and i1 %222, %223
br i1 %224, label %ENDIF2594, label %ELSE2632
%tmp222 = fcmp oeq float %temp292.11, %tmp100
%tmp223 = fcmp olt float %tmp100, %tmp102
%tmp224 = and i1 %tmp222, %tmp223
br i1 %tmp224, label %ENDIF2594, label %ELSE2632
ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
%temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
%225 = fmul float %temp894.2, undef
%tmp225 = fmul float %temp894.2, undef
br label %ENDIF2564
ELSE2632: ; preds = %ELSE2596
br i1 undef, label %ENDIF2594, label %ELSE2650
ELSE2650: ; preds = %ELSE2632
%226 = fcmp oeq float %temp292.11, %110
%227 = fcmp olt float %110, %111
%228 = and i1 %226, %227
br i1 %228, label %IF2667, label %ELSE2668
%tmp226 = fcmp oeq float %temp292.11, %tmp110
%tmp227 = fcmp olt float %tmp110, %tmp111
%tmp228 = and i1 %tmp226, %tmp227
br i1 %tmp228, label %IF2667, label %ELSE2668
IF2667: ; preds = %ELSE2650
br i1 undef, label %ENDIF2594, label %ELSE2671
ELSE2668: ; preds = %ELSE2650
%229 = fcmp oeq float %temp292.11, %128
%230 = fcmp olt float %128, undef
%231 = and i1 %229, %230
br i1 %231, label %ENDIF2594, label %ELSE2686
%tmp229 = fcmp oeq float %temp292.11, %tmp128
%tmp230 = fcmp olt float %tmp128, undef
%tmp231 = and i1 %tmp229, %tmp230
br i1 %tmp231, label %ENDIF2594, label %ELSE2686
ELSE2671: ; preds = %IF2667
br label %ENDIF2594
ELSE2686: ; preds = %ELSE2668
%232 = fcmp oeq float %temp292.11, %145
%233 = fcmp olt float %145, undef
%234 = and i1 %232, %233
br i1 %234, label %ENDIF2594, label %ELSE2704
%tmp232 = fcmp oeq float %temp292.11, %tmp145
%tmp233 = fcmp olt float %tmp145, undef
%tmp234 = and i1 %tmp232, %tmp233
br i1 %tmp234, label %ENDIF2594, label %ELSE2704
ELSE2704: ; preds = %ELSE2686
%235 = fcmp oeq float %temp292.11, %180
%236 = fcmp olt float %180, undef
%237 = and i1 %235, %236
br i1 %237, label %ENDIF2594, label %ELSE2740
%tmp235 = fcmp oeq float %temp292.11, %tmp180
%tmp236 = fcmp olt float %tmp180, undef
%tmp237 = and i1 %tmp235, %tmp236
br i1 %tmp237, label %ENDIF2594, label %ELSE2740
ELSE2740: ; preds = %ELSE2704
br i1 undef, label %IF2757, label %ELSE2758
@ -349,8 +350,8 @@ ELSE2761: ; preds = %IF2757
br label %ENDIF2594
IF2775: ; preds = %ELSE2758
%238 = fcmp olt float undef, undef
br i1 %238, label %ENDIF2594, label %ELSE2779
%tmp238 = fcmp olt float undef, undef
br i1 %tmp238, label %ENDIF2594, label %ELSE2779
ELSE2779: ; preds = %IF2775
br i1 undef, label %ENDIF2594, label %ELSE2782
@ -359,39 +360,39 @@ ELSE2782: ; preds = %ELSE2779
br i1 undef, label %ENDIF2594, label %ELSE2785
ELSE2785: ; preds = %ELSE2782
%239 = fcmp olt float undef, 0.000000e+00
br i1 %239, label %ENDIF2594, label %ELSE2788
%tmp239 = fcmp olt float undef, 0.000000e+00
br i1 %tmp239, label %ENDIF2594, label %ELSE2788
ELSE2788: ; preds = %ELSE2785
%240 = fcmp olt float 0.000000e+00, undef
%.2848 = select i1 %240, float -1.000000e+00, float 1.000000e+00
%tmp240 = fcmp olt float 0.000000e+00, undef
%.2848 = select i1 %tmp240, float -1.000000e+00, float 1.000000e+00
br label %ENDIF2594
ELSE2797: ; preds = %ENDIF2564
%241 = fsub float %8, undef
%242 = fsub float %9, undef
%243 = fsub float %10, undef
%244 = fmul float %241, undef
%245 = fmul float %242, undef
%246 = fmul float %243, undef
%247 = fsub float %11, undef
%248 = fsub float %12, undef
%249 = fsub float %13, undef
%250 = fmul float %247, undef
%251 = fmul float %248, undef
%252 = fmul float %249, undef
%253 = call float @llvm.minnum.f32(float %244, float %250)
%254 = call float @llvm.minnum.f32(float %245, float %251)
%255 = call float @llvm.maxnum.f32(float %246, float %252)
%256 = call float @llvm.maxnum.f32(float %253, float %254)
%257 = call float @llvm.maxnum.f32(float %256, float undef)
%258 = call float @llvm.minnum.f32(float undef, float %255)
%259 = fcmp ogt float %257, 0.000000e+00
%260 = fcmp olt float %257, 1.000000e+00
%261 = and i1 %259, %260
%262 = fcmp olt float %257, %258
%263 = and i1 %261, %262
br i1 %263, label %ENDIF2795, label %ELSE2800
%tmp241 = fsub float %tmp8, undef
%tmp242 = fsub float %tmp9, undef
%tmp243 = fsub float %tmp10, undef
%tmp244 = fmul float %tmp241, undef
%tmp245 = fmul float %tmp242, undef
%tmp246 = fmul float %tmp243, undef
%tmp247 = fsub float %tmp11, undef
%tmp248 = fsub float %tmp12, undef
%tmp249 = fsub float %tmp13, undef
%tmp250 = fmul float %tmp247, undef
%tmp251 = fmul float %tmp248, undef
%tmp252 = fmul float %tmp249, undef
%tmp253 = call float @llvm.minnum.f32(float %tmp244, float %tmp250)
%tmp254 = call float @llvm.minnum.f32(float %tmp245, float %tmp251)
%tmp255 = call float @llvm.maxnum.f32(float %tmp246, float %tmp252)
%tmp256 = call float @llvm.maxnum.f32(float %tmp253, float %tmp254)
%tmp257 = call float @llvm.maxnum.f32(float %tmp256, float undef)
%tmp258 = call float @llvm.minnum.f32(float undef, float %tmp255)
%tmp259 = fcmp ogt float %tmp257, 0.000000e+00
%tmp260 = fcmp olt float %tmp257, 1.000000e+00
%tmp261 = and i1 %tmp259, %tmp260
%tmp262 = fcmp olt float %tmp257, %tmp258
%tmp263 = and i1 %tmp261, %tmp262
br i1 %tmp263, label %ENDIF2795, label %ELSE2800
ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
br label %LOOP
@ -400,53 +401,53 @@ ELSE2800: ; preds = %ELSE2797
br i1 undef, label %ENDIF2795, label %ELSE2803
ELSE2803: ; preds = %ELSE2800
%264 = fsub float %20, undef
%265 = fsub float %21, undef
%266 = fsub float %22, undef
%267 = fmul float %264, undef
%268 = fmul float %265, undef
%269 = fmul float %266, 0.000000e+00
%270 = fsub float %23, undef
%271 = fsub float %24, undef
%272 = fsub float %25, undef
%273 = fmul float %270, undef
%274 = fmul float %271, undef
%275 = fmul float %272, undef
%276 = call float @llvm.minnum.f32(float %267, float %273)
%277 = call float @llvm.maxnum.f32(float %268, float %274)
%278 = call float @llvm.maxnum.f32(float %269, float %275)
%279 = call float @llvm.maxnum.f32(float %276, float undef)
%280 = call float @llvm.maxnum.f32(float %279, float undef)
%281 = call float @llvm.minnum.f32(float undef, float %277)
%282 = call float @llvm.minnum.f32(float %281, float %278)
%283 = fcmp ogt float %280, 0.000000e+00
%284 = fcmp olt float %280, 1.000000e+00
%285 = and i1 %283, %284
%286 = fcmp olt float %280, %282
%287 = and i1 %285, %286
br i1 %287, label %ENDIF2795, label %ELSE2806
%tmp264 = fsub float %tmp20, undef
%tmp265 = fsub float %tmp21, undef
%tmp266 = fsub float %tmp22, undef
%tmp267 = fmul float %tmp264, undef
%tmp268 = fmul float %tmp265, undef
%tmp269 = fmul float %tmp266, 0.000000e+00
%tmp270 = fsub float %tmp23, undef
%tmp271 = fsub float %tmp24, undef
%tmp272 = fsub float %tmp25, undef
%tmp273 = fmul float %tmp270, undef
%tmp274 = fmul float %tmp271, undef
%tmp275 = fmul float %tmp272, undef
%tmp276 = call float @llvm.minnum.f32(float %tmp267, float %tmp273)
%tmp277 = call float @llvm.maxnum.f32(float %tmp268, float %tmp274)
%tmp278 = call float @llvm.maxnum.f32(float %tmp269, float %tmp275)
%tmp279 = call float @llvm.maxnum.f32(float %tmp276, float undef)
%tmp280 = call float @llvm.maxnum.f32(float %tmp279, float undef)
%tmp281 = call float @llvm.minnum.f32(float undef, float %tmp277)
%tmp282 = call float @llvm.minnum.f32(float %tmp281, float %tmp278)
%tmp283 = fcmp ogt float %tmp280, 0.000000e+00
%tmp284 = fcmp olt float %tmp280, 1.000000e+00
%tmp285 = and i1 %tmp283, %tmp284
%tmp286 = fcmp olt float %tmp280, %tmp282
%tmp287 = and i1 %tmp285, %tmp286
br i1 %tmp287, label %ENDIF2795, label %ELSE2806
ELSE2806: ; preds = %ELSE2803
%288 = fsub float %26, undef
%289 = fsub float %27, undef
%290 = fsub float %28, undef
%291 = fmul float %288, undef
%292 = fmul float %289, 0.000000e+00
%293 = fmul float %290, undef
%294 = fsub float %29, undef
%295 = fmul float %294, undef
%296 = call float @llvm.minnum.f32(float %291, float %295)
%297 = call float @llvm.minnum.f32(float %292, float undef)
%298 = call float @llvm.maxnum.f32(float %293, float undef)
%299 = call float @llvm.maxnum.f32(float %296, float %297)
%300 = call float @llvm.maxnum.f32(float %299, float undef)
%301 = call float @llvm.minnum.f32(float undef, float %298)
%302 = fcmp ogt float %300, 0.000000e+00
%303 = fcmp olt float %300, 1.000000e+00
%304 = and i1 %302, %303
%305 = fcmp olt float %300, %301
%306 = and i1 %304, %305
br i1 %306, label %ENDIF2795, label %ELSE2809
%tmp288 = fsub float %tmp26, undef
%tmp289 = fsub float %tmp27, undef
%tmp290 = fsub float %tmp28, undef
%tmp291 = fmul float %tmp288, undef
%tmp292 = fmul float %tmp289, 0.000000e+00
%tmp293 = fmul float %tmp290, undef
%tmp294 = fsub float %tmp29, undef
%tmp295 = fmul float %tmp294, undef
%tmp296 = call float @llvm.minnum.f32(float %tmp291, float %tmp295)
%tmp297 = call float @llvm.minnum.f32(float %tmp292, float undef)
%tmp298 = call float @llvm.maxnum.f32(float %tmp293, float undef)
%tmp299 = call float @llvm.maxnum.f32(float %tmp296, float %tmp297)
%tmp300 = call float @llvm.maxnum.f32(float %tmp299, float undef)
%tmp301 = call float @llvm.minnum.f32(float undef, float %tmp298)
%tmp302 = fcmp ogt float %tmp300, 0.000000e+00
%tmp303 = fcmp olt float %tmp300, 1.000000e+00
%tmp304 = and i1 %tmp302, %tmp303
%tmp305 = fcmp olt float %tmp300, %tmp301
%tmp306 = and i1 %tmp304, %tmp305
br i1 %tmp306, label %ENDIF2795, label %ELSE2809
ELSE2809: ; preds = %ELSE2806
br i1 undef, label %ENDIF2795, label %ELSE2812
@ -461,53 +462,42 @@ ELSE2818: ; preds = %ELSE2815
br i1 undef, label %ENDIF2795, label %ELSE2821
ELSE2821: ; preds = %ELSE2818
%307 = fsub float %56, undef
%308 = fsub float %57, undef
%309 = fsub float %58, undef
%310 = fmul float %307, undef
%311 = fmul float %308, 0.000000e+00
%312 = fmul float %309, undef
%313 = fsub float %59, undef
%314 = fsub float %60, undef
%315 = fsub float %61, undef
%316 = fmul float %313, undef
%317 = fmul float %314, undef
%318 = fmul float %315, undef
%319 = call float @llvm.maxnum.f32(float %310, float %316)
%320 = call float @llvm.maxnum.f32(float %311, float %317)
%321 = call float @llvm.maxnum.f32(float %312, float %318)
%322 = call float @llvm.minnum.f32(float %319, float %320)
%323 = call float @llvm.minnum.f32(float %322, float %321)
%324 = fcmp ogt float undef, 0.000000e+00
%325 = fcmp olt float undef, 1.000000e+00
%326 = and i1 %324, %325
%327 = fcmp olt float undef, %323
%328 = and i1 %326, %327
br i1 %328, label %ENDIF2795, label %ELSE2824
%tmp307 = fsub float %tmp56, undef
%tmp308 = fsub float %tmp57, undef
%tmp309 = fsub float %tmp58, undef
%tmp310 = fmul float %tmp307, undef
%tmp311 = fmul float %tmp308, 0.000000e+00
%tmp312 = fmul float %tmp309, undef
%tmp313 = fsub float %tmp59, undef
%tmp314 = fsub float %tmp60, undef
%tmp315 = fsub float %tmp61, undef
%tmp316 = fmul float %tmp313, undef
%tmp317 = fmul float %tmp314, undef
%tmp318 = fmul float %tmp315, undef
%tmp319 = call float @llvm.maxnum.f32(float %tmp310, float %tmp316)
%tmp320 = call float @llvm.maxnum.f32(float %tmp311, float %tmp317)
%tmp321 = call float @llvm.maxnum.f32(float %tmp312, float %tmp318)
%tmp322 = call float @llvm.minnum.f32(float %tmp319, float %tmp320)
%tmp323 = call float @llvm.minnum.f32(float %tmp322, float %tmp321)
%tmp324 = fcmp ogt float undef, 0.000000e+00
%tmp325 = fcmp olt float undef, 1.000000e+00
%tmp326 = and i1 %tmp324, %tmp325
%tmp327 = fcmp olt float undef, %tmp323
%tmp328 = and i1 %tmp326, %tmp327
br i1 %tmp328, label %ENDIF2795, label %ELSE2824
ELSE2824: ; preds = %ELSE2821
%.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
br label %ENDIF2795
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -1,16 +1,16 @@
; RUN: llc < %s -march=amdgcn -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
; RUN: llc -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SIVI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=SIVI %s
; SMRD load with an immediate offset.
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
%1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
%tmp1 = load i32, i32 addrspace(2)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -18,11 +18,11 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
%1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
%tmp1 = load i32, i32 addrspace(2)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -33,11 +33,11 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
%1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
%tmp1 = load i32, i32 addrspace(2)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -48,11 +48,11 @@ entry:
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; GCN: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
%1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
%tmp1 = load i32, i32 addrspace(2)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -62,11 +62,11 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
%1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
%tmp1 = load i32, i32 addrspace(2)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -76,11 +76,11 @@ entry:
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
%1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
%tmp1 = load i32, i32 addrspace(2)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -88,12 +88,12 @@ entry:
; GCN-LABEL: {{^}}smrd_load_const0:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@ -102,14 +102,15 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const1:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1020)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
; SMRD load using the load.const intrinsic with an offset greater than the
; largets possible immediate.
; immediate offset.
@ -118,12 +119,12 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1024)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@ -133,12 +134,12 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048572)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@ -148,18 +149,17 @@ main_body:
; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048576)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind readnone }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -107,7 +107,7 @@ endif: ; preds = %else, %if
%export = phi float [ %lds_data, %if ], [ %interp, %else ]
%tmp4 = call i32 @llvm.SI.packf16(float %export, float %export)
%tmp5 = bitcast i32 %tmp4 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp5, float %tmp5, float %tmp5, float %tmp5)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp5, float %tmp5, float %tmp5, float %tmp5, i1 true, i1 true) #0
ret void
}
@ -205,11 +205,9 @@ ret:
ret void
}
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
declare i32 @llvm.SI.packf16(float, float) readnone
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare i32 @llvm.SI.packf16(float, float) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -1,11 +1,11 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; FIXME: Move this to sgpr-copy.ll when this is fixed on VI.
; Make sure that when we split an smrd instruction in order to move it to
; the VALU, we are also moving its users to the VALU.
; CHECK-LABEL: {{^}}split_smrd_add_worklist:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
; GCN-LABEL: {{^}}split_smrd_add_worklist:
; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
bb:
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
@ -24,24 +24,20 @@ bb3: ; preds = %bb
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
%tmp13 = bitcast i32 %tmp12 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp13, float undef, float undef)
%tmp13 = bitcast i32 %tmp12 to <2 x half>
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
ret void
}
; Function Attrs: nounwind readnone
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare i32 @llvm.SI.packf16(float, float) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", !3}
!2 = !{!1, !1, i64 0}
!3 = !{!"tbaa root"}
!1 = !{!"const", !2}
!2 = !{!"tbaa root"}

View File

@ -1,39 +1,37 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -o - %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s
; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
; SI-LABEL:{{^}}row_filter_C1_D0:
; SI: s_endpgm
; Function Attrs: nounwind
; GCN-LABEL:{{^}}row_filter_C1_D0:
define void @row_filter_C1_D0() {
entry:
br i1 undef, label %for.inc.1, label %do.body.preheader
do.body.preheader: ; preds = %entry
%0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
%tmp = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
br i1 undef, label %do.body56.1, label %do.body90
do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
%1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ]
%2 = insertelement <4 x i32> %1, i32 undef, i32 2
%3 = insertelement <4 x i32> %2, i32 undef, i32 3
%tmp1 = phi <4 x i32> [ %tmp6, %do.body56.2 ], [ %tmp5, %do.body56.1 ], [ %tmp, %do.body.preheader ]
%tmp2 = insertelement <4 x i32> %tmp1, i32 undef, i32 2
%tmp3 = insertelement <4 x i32> %tmp2, i32 undef, i32 3
br i1 undef, label %do.body124.1, label %do.body.1562.preheader
do.body.1562.preheader: ; preds = %do.body124.1, %do.body90
%storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ]
%4 = insertelement <4 x i32> undef, i32 undef, i32 1
%storemerge = phi <4 x i32> [ %tmp3, %do.body90 ], [ %tmp7, %do.body124.1 ]
%tmp4 = insertelement <4 x i32> undef, i32 undef, i32 1
br label %for.inc.1
do.body56.1: ; preds = %do.body.preheader
%5 = insertelement <4 x i32> %0, i32 undef, i32 1
%tmp5 = insertelement <4 x i32> %tmp, i32 undef, i32 1
%or.cond472.1 = or i1 undef, undef
br i1 %or.cond472.1, label %do.body56.2, label %do.body90
do.body56.2: ; preds = %do.body56.1
%6 = insertelement <4 x i32> %5, i32 undef, i32 1
%tmp6 = insertelement <4 x i32> %tmp5, i32 undef, i32 1
br label %do.body90
do.body124.1: ; preds = %do.body90
%7 = insertelement <4 x i32> %3, i32 undef, i32 3
%tmp7 = insertelement <4 x i32> %tmp3, i32 undef, i32 3
br label %do.body.1562.preheader
for.inc.1: ; preds = %do.body.1562.preheader, %entry
@ -42,8 +40,8 @@ for.inc.1: ; preds = %do.body.1562.prehea
unreachable
}
; SI-LABEL: {{^}}foo:
; SI: s_endpgm
; GCN-LABEL: {{^}}foo:
; GCN: s_endpgm
define amdgpu_ps void @foo() #0 {
bb:
br i1 undef, label %bb2, label %bb1
@ -78,9 +76,9 @@ bb13: ; preds = %bb2
bb14: ; preds = %bb27, %bb24, %bb9
%tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]
%tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
%tmp17 = fmul float 10.5, %tmp16
%tmp18 = fmul float 11.5, %tmp15
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp18, float %tmp17, float %tmp17, float %tmp17)
%tmp17 = fmul float 1.050000e+01, %tmp16
%tmp18 = fmul float 1.150000e+01, %tmp15
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) #0
ret void
bb23: ; preds = %bb13
@ -97,13 +95,8 @@ bb27: ; preds = %bb24
br label %bb14
}
; Function Attrs: nounwind readnone
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -5,17 +5,19 @@
; FUNC-LABEL: {{^}}udiv_i32:
; EG-NOT: SETGE_INT
; EG: CF_END
; SI: v_rcp_iflag_f32_e32
define void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1) * %in
%b = load i32, i32 addrspace(1) * %b_ptr
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
%result = udiv i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_udiv_i32:
; SI: v_rcp_iflag_f32_e32
define void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%result = udiv i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
@ -30,6 +32,8 @@ define void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
; FUNC-LABEL: {{^}}udiv_v2i32:
; EG: CF_END
; SI: v_rcp_iflag_f32_e32
; SI: v_rcp_iflag_f32_e32
; SI: s_endpgm
define void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@ -158,3 +162,21 @@ define void @scalarize_mulhu_4xi32(<4 x i32> addrspace(1)* nocapture readonly %i
store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16
ret void
}
; FUNC-LABEL: {{^}}test_udiv2:
; SI: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test_udiv2(i32 %p) {
%i = udiv i32 %p, 2
store volatile i32 %i, i32 addrspace(1)* undef
ret void
}
; FUNC-LABEL: {{^}}test_udiv_3_mulhu:
; SI: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
; SI: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
define void @test_udiv_3_mulhu(i32 %p) {
%i = udiv i32 %p, 3
store volatile i32 %i, i32 addrspace(1)* undef
ret void
}

View File

@ -1,13 +0,0 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK: v_rcp_iflag_f32_e32
define void @test(i32 %p, i32 %q) {
%i = udiv i32 %p, %q
%r = bitcast i32 %i to float
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
ret void
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

View File

@ -179,39 +179,39 @@ bb24: ; preds = %bb157, %bb
br i1 %tmp155, label %bb156, label %bb157
bb156: ; preds = %bb24
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp12, float %tmp103, float %tmp102, float %tmp101)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %tmp99, float %tmp98, float %tmp97, float %tmp95)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %tmp94, float %tmp93, float %tmp91, float %tmp90)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %tmp89, float %tmp87, float %tmp86, float %tmp85)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %tmp83, float %tmp82, float %tmp81, float %tmp79)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %tmp78, float %tmp77, float %tmp75, float %tmp74)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %tmp73, float %tmp71, float %tmp70, float %tmp69)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %tmp67, float %tmp66, float %tmp65, float %tmp63)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %tmp62, float %tmp61, float %tmp59, float %tmp58)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %tmp57, float %tmp55, float %tmp54, float %tmp53)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %tmp51, float %tmp50, float %tmp49, float %tmp47)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %tmp46, float %tmp45, float %tmp43, float %tmp42)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %tmp41, float %tmp39, float %tmp38, float %tmp37)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %tmp35, float %tmp34, float %tmp33, float %tmp31)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %tmp30, float %tmp29, float %tmp27, float %tmp26)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %tmp25, float %tmp28, float %tmp32, float %tmp36)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %tmp40, float %tmp44, float %tmp48, float %tmp52)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %tmp56, float %tmp60, float %tmp64, float %tmp68)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %tmp72, float %tmp76, float %tmp80, float %tmp84)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %tmp88, float %tmp92, float %tmp96, float %tmp100)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %tmp104, float %tmp105, float %tmp106, float %tmp108)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %tmp109, float %tmp110, float %tmp111, float %tmp112)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %tmp113, float %tmp114, float %tmp115, float %tmp116)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %tmp117, float %tmp118, float %tmp119, float %tmp120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %tmp121, float %tmp122, float %tmp123, float %tmp124)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %tmp125, float %tmp126, float %tmp127, float %tmp128)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %tmp129, float %tmp130, float %tmp131, float %tmp132)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %tmp133, float %tmp134, float %tmp135, float %tmp136)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %tmp137, float %tmp138, float %tmp139, float %tmp140)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %tmp141, float %tmp142, float %tmp143, float %tmp144)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %tmp145, float %tmp146, float %tmp147, float %tmp148)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %tmp149, float %tmp150, float %tmp151, float %tmp13)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp12, float %tmp103, float %tmp102, float %tmp101, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %tmp99, float %tmp98, float %tmp97, float %tmp95, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %tmp94, float %tmp93, float %tmp91, float %tmp90, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %tmp89, float %tmp87, float %tmp86, float %tmp85, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %tmp83, float %tmp82, float %tmp81, float %tmp79, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %tmp78, float %tmp77, float %tmp75, float %tmp74, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %tmp73, float %tmp71, float %tmp70, float %tmp69, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %tmp67, float %tmp66, float %tmp65, float %tmp63, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp62, float %tmp61, float %tmp59, float %tmp58, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %tmp57, float %tmp55, float %tmp54, float %tmp53, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %tmp51, float %tmp50, float %tmp49, float %tmp47, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %tmp46, float %tmp45, float %tmp43, float %tmp42, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 44, i32 15, float %tmp41, float %tmp39, float %tmp38, float %tmp37, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 45, i32 15, float %tmp35, float %tmp34, float %tmp33, float %tmp31, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 46, i32 15, float %tmp30, float %tmp29, float %tmp27, float %tmp26, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 47, i32 15, float %tmp25, float %tmp28, float %tmp32, float %tmp36, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 48, i32 15, float %tmp40, float %tmp44, float %tmp48, float %tmp52, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 49, i32 15, float %tmp56, float %tmp60, float %tmp64, float %tmp68, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 50, i32 15, float %tmp72, float %tmp76, float %tmp80, float %tmp84, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 51, i32 15, float %tmp88, float %tmp92, float %tmp96, float %tmp100, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 52, i32 15, float %tmp104, float %tmp105, float %tmp106, float %tmp108, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 53, i32 15, float %tmp109, float %tmp110, float %tmp111, float %tmp112, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 54, i32 15, float %tmp113, float %tmp114, float %tmp115, float %tmp116, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 55, i32 15, float %tmp117, float %tmp118, float %tmp119, float %tmp120, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 56, i32 15, float %tmp121, float %tmp122, float %tmp123, float %tmp124, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 57, i32 15, float %tmp125, float %tmp126, float %tmp127, float %tmp128, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 58, i32 15, float %tmp129, float %tmp130, float %tmp131, float %tmp132, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 59, i32 15, float %tmp133, float %tmp134, float %tmp135, float %tmp136, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 60, i32 15, float %tmp137, float %tmp138, float %tmp139, float %tmp140, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 61, i32 15, float %tmp141, float %tmp142, float %tmp143, float %tmp144, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 62, i32 15, float %tmp145, float %tmp146, float %tmp147, float %tmp148, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float %tmp149, float %tmp150, float %tmp151, float %tmp13, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 true, i1 false) #0
ret void
bb157: ; preds = %bb24
@ -482,15 +482,11 @@ bb157: ; preds = %bb24
br label %bb24
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -11,7 +11,7 @@
; DEFAULT: exp
; DEFAULT: s_waitcnt lgkmcnt(0)
; DEFAULT: s_endpgm
define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) {
define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@ -20,8 +20,7 @@ main_body:
%tmp13 = extractelement <4 x float> %tmp11, i32 1
call void @llvm.amdgcn.s.barrier() #1
%tmp14 = extractelement <4 x float> %tmp11, i32 2
; %tmp15 = extractelement <4 x float> %tmp11, i32 3
%tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
%tmp15 = load float, float addrspace(2)* %constptr, align 4
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
%tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
%tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
@ -29,8 +28,8 @@ main_body:
%tmp20 = extractelement <4 x float> %tmp18, i32 1
%tmp21 = extractelement <4 x float> %tmp18, i32 2
%tmp22 = extractelement <4 x float> %tmp18, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp12, float %tmp13, float %tmp14, float %tmp15)
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp12, float %tmp13, float %tmp14, float %tmp15, i1 true, i1 false) #0
ret void
}
@ -44,40 +43,34 @@ main_body:
; ILPMAX: s_waitcnt vmcnt(1)
; ILPMAX: s_waitcnt vmcnt(0)
; ILPMAX: s_endpgm
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
byval, i32 inreg, i32 inreg, i32, i32, i32, i32) {
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
main_body:
%11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = add i32 %5, %7
%14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
%15 = extractelement <4 x float> %14, i32 0
%16 = extractelement <4 x float> %14, i32 1
%17 = extractelement <4 x float> %14, i32 2
%18 = extractelement <4 x float> %14, i32 3
%19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
%21 = add i32 %5, %7
%22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
%23 = extractelement <4 x float> %22, i32 0
%24 = extractelement <4 x float> %22, i32 1
%25 = extractelement <4 x float> %22, i32 2
%26 = extractelement <4 x float> %22, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
%tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
%tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
%tmp12 = add i32 %arg5, %arg7
%tmp13 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp11, i32 0, i32 %tmp12)
%tmp14 = extractelement <4 x float> %tmp13, i32 0
%tmp15 = extractelement <4 x float> %tmp13, i32 1
%tmp16 = extractelement <4 x float> %tmp13, i32 2
%tmp17 = extractelement <4 x float> %tmp13, i32 3
%tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 1
%tmp19 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp18, align 16, !tbaa !0
%tmp20 = add i32 %arg5, %arg7
%tmp21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp19, i32 0, i32 %tmp20)
%tmp22 = extractelement <4 x float> %tmp21, i32 0
%tmp23 = extractelement <4 x float> %tmp21, i32 1
%tmp24 = extractelement <4 x float> %tmp21, i32 2
%tmp25 = extractelement <4 x float> %tmp21, i32 3
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp14, float %tmp15, float %tmp16, float %tmp17, i1 true, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp22, float %tmp23, float %tmp24, float %tmp25, i1 false, i1 false) #0
ret void
}
; Function Attrs: convergent nounwind
declare void @llvm.amdgcn.s.barrier() #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind }
attributes #1 = { convergent nounwind }
attributes #2 = { nounwind readnone }

View File

@ -1,5 +1,5 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=VI %s
; Check that WQM isn't triggered by image load/store intrinsics.
;
@ -25,9 +25,7 @@ main_body:
%c.3 = extractelement <4 x i32> %c.2, i32 0
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
%data = load float, float addrspace(1)* %gep
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %data, float undef, float undef, float undef, i1 true, i1 true) #1
ret void
}
@ -500,7 +498,7 @@ end:
ret <4 x float> %r
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
@ -512,8 +510,7 @@ declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i3
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
declare void @llvm.AMDGPU.kill(float)
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare void @llvm.AMDGPU.kill(float) #1
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }

View File

@ -6,46 +6,51 @@
target triple = "amdgcn--"
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2
define amdgpu_vs void @wrapper(i32 inreg, i32) {
define amdgpu_vs void @wrapper(i32 inreg %arg, i32 %arg1) {
main_body:
%2 = add i32 %1, %0
%3 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %2)
%4 = extractelement <4 x float> %3, i32 1
%5 = fptosi float %4 to i32
%6 = insertelement <2 x i32> undef, i32 %5, i32 1
%tmp = add i32 %arg1, %arg
%tmp2 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %tmp)
%tmp3 = extractelement <4 x float> %tmp2, i32 1
%tmp4 = fptosi float %tmp3 to i32
%tmp5 = insertelement <2 x i32> undef, i32 %tmp4, i32 1
br label %loop11.i
loop11.i: ; preds = %endif46.i, %main_body
%7 = phi i32 [ 0, %main_body ], [ %15, %endif46.i ]
%8 = icmp sgt i32 %7, 999
br i1 %8, label %main.exit, label %if16.i
%tmp6 = phi i32 [ 0, %main_body ], [ %tmp14, %endif46.i ]
%tmp7 = icmp sgt i32 %tmp6, 999
br i1 %tmp7, label %main.exit, label %if16.i
if16.i: ; preds = %loop11.i
%9 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %6, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
%10 = extractelement <4 x float> %9, i32 0
%11 = fcmp ult float 0.000000e+00, %10
br i1 %11, label %if28.i, label %endif46.i
%tmp8 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp5, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
%tmp9 = extractelement <4 x float> %tmp8, i32 0
%tmp10 = fcmp ult float 0.000000e+00, %tmp9
br i1 %tmp10, label %if28.i, label %endif46.i
if28.i: ; preds = %if16.i
%12 = bitcast float %10 to i32
%13 = shl i32 %12, 16
%14 = bitcast i32 %13 to float
%tmp11 = bitcast float %tmp9 to i32
%tmp12 = shl i32 %tmp11, 16
%tmp13 = bitcast i32 %tmp12 to float
br label %main.exit
endif46.i: ; preds = %if16.i
%15 = add i32 %7, 1
%tmp14 = add i32 %tmp6, 1
br label %loop11.i
main.exit: ; preds = %if28.i, %loop11.i
%16 = phi float [ %14, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %16, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000)
%tmp15 = phi float [ %tmp13, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp15, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000, i1 false, i1 false) #0
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind }
; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind readonly }