mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
AMDGPU: Remove some uses of llvm.SI.export in tests
Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792
This commit is contained in:
parent
65d8dccee7
commit
3320e649a3
@ -3,19 +3,15 @@
|
||||
|
||||
; This test just checks that the compiler doesn't crash.
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
|
||||
; SI: s_endpgm
|
||||
define amdgpu_ps void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
|
||||
define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
|
||||
entry:
|
||||
%1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
|
||||
%2 = bitcast <32 x i8> %1 to <8 x i32>
|
||||
%3 = extractelement <8 x i32> %2, i32 1
|
||||
%4 = icmp ne i32 %3, 0
|
||||
%5 = select i1 %4, float 0.0, float 1.0
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
|
||||
ret void
|
||||
ret float %5
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
|
||||
|
@ -4,7 +4,7 @@
|
||||
; GCN-LABEL: {{^}}main:
|
||||
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
|
||||
define amdgpu_ps void @main(float %arg0, float %arg1) #0 {
|
||||
define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
|
||||
bb:
|
||||
%tmp = fptosi float %arg0 to i32
|
||||
%tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
@ -17,13 +17,11 @@ bb:
|
||||
%tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
|
||||
%tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
|
||||
%tmp9 = bitcast i32 %tmp8 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
|
||||
ret void
|
||||
ret float %tmp9
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -97,18 +97,15 @@ main_body:
|
||||
|
||||
; GCN-LABEL: {{^}}kill_vcc_implicit_def:
|
||||
; GCN: IeeeMode: 0
|
||||
define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
|
||||
define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
|
||||
entry:
|
||||
%tmp0 = fcmp olt float %13, 0.0
|
||||
call void @llvm.AMDGPU.kill(float %14)
|
||||
%tmp1 = select i1 %tmp0, float 1.0, float 0.0
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
|
||||
ret void
|
||||
ret float %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare void @llvm.AMDGPU.kill(float)
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="tahiti" }
|
||||
attributes #1 = { nounwind "target-cpu"="fiji" }
|
||||
|
@ -24,11 +24,13 @@
|
||||
; TONGA-NEXT: .long 704
|
||||
; CONFIG: .p2align 8
|
||||
; CONFIG: test:
|
||||
define amdgpu_ps void @test(i32 %p) {
|
||||
define amdgpu_ps void @test(i32 %p) #0 {
|
||||
%i = add i32 %p, 2
|
||||
%r = bitcast i32 %i to float
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r, float %r, float %r, float %r, i1 true, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -667,3 +667,18 @@ define void @store_literal_imm_f64(double addrspace(1)* %out) {
|
||||
store double 4096.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}literal_folding:
|
||||
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
|
||||
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
|
||||
define amdgpu_vs void @literal_folding(float %arg) {
|
||||
main_body:
|
||||
%tmp = fmul float %arg, 0x3FE86A7F00000000
|
||||
%tmp1 = fmul float %arg, 0xBFE86A7F00000000
|
||||
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,18 +1,18 @@
|
||||
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
|
||||
--- |
|
||||
define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
|
||||
define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x
|
||||
i32> inreg, i32 inreg %w, float %v) #0 {
|
||||
%a = load volatile float, float addrspace(1)* undef
|
||||
%b = load volatile float, float addrspace(1)* undef
|
||||
%c = load volatile float, float addrspace(1)* undef
|
||||
%d = load volatile float, float addrspace(1)* undef
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d)
|
||||
call void @llvm.amdgcn.exp.f32(i32 15, i32 1, float %a, float %b, float %c, float %d, i1 true, i1 false)
|
||||
ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
|
||||
}
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
attributes #0 = { readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
|
@ -4,15 +4,14 @@
|
||||
; SI-LABEL: {{^}}kill_gs_const:
|
||||
; SI-NOT: v_cmpx_le_f32
|
||||
; SI: s_mov_b64 exec, 0
|
||||
|
||||
define amdgpu_gs void @kill_gs_const() {
|
||||
main_body:
|
||||
%0 = icmp ule i32 0, 3
|
||||
%1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
|
||||
call void @llvm.AMDGPU.kill(float %1)
|
||||
%2 = icmp ule i32 3, 0
|
||||
%3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
|
||||
call void @llvm.AMDGPU.kill(float %3)
|
||||
%tmp = icmp ule i32 0, 3
|
||||
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
|
||||
call void @llvm.AMDGPU.kill(float %tmp1)
|
||||
%tmp2 = icmp ule i32 3, 0
|
||||
%tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
|
||||
call void @llvm.AMDGPU.kill(float %tmp3)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -21,16 +20,16 @@ main_body:
|
||||
; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
|
||||
; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
|
||||
define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
|
||||
define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
|
||||
entry:
|
||||
%tmp0 = fcmp olt float %13, 0.0
|
||||
call void @llvm.AMDGPU.kill(float %14)
|
||||
%tmp1 = select i1 %tmp0, float 1.0, float 0.0
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
|
||||
%tmp0 = fcmp olt float %arg13, 0.000000e+00
|
||||
call void @llvm.AMDGPU.kill(float %arg14)
|
||||
%tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
|
||||
call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.kill(float)
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare void @llvm.AMDGPU.kill(float) #0
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
!0 = !{!"const", null, i32 1}
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,146 +1,144 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=CHECK,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_v4i32:
|
||||
;CHECK: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
|
||||
; GCN-LABEL: {{^}}image_load_v4i32:
|
||||
; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_v2i32:
|
||||
;CHECK: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
|
||||
; GCN-LABEL: {{^}}image_load_v2i32:
|
||||
; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_i32:
|
||||
;CHECK: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) {
|
||||
; GCN-LABEL: {{^}}image_load_i32:
|
||||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_mip:
|
||||
;CHECK: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) {
|
||||
; GCN-LABEL: {{^}}image_load_mip:
|
||||
; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret <4 x float> %tex
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_1:
|
||||
;CHECK: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
|
||||
; GCN-LABEL: {{^}}image_load_1:
|
||||
; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
main_body:
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
%elt = extractelement <4 x float> %tex, i32 0
|
||||
; Only first component used, test that dmask etc. is changed accordingly
|
||||
ret float %elt
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_f32_v2i32:
|
||||
;CHECK: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
|
||||
; GCN-LABEL: {{^}}image_load_f32_v2i32:
|
||||
; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
|
||||
main_body:
|
||||
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
|
||||
ret float %tex
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_load_v2f32_v4i32:
|
||||
;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
|
||||
; GCN-LABEL: {{^}}image_load_v2f32_v4i32:
|
||||
; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
main_body:
|
||||
%tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
|
||||
%tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
|
||||
ret <2 x float> %tex
|
||||
}
|
||||
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_v4i32:
|
||||
;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
|
||||
; GCN-LABEL: {{^}}image_store_v4i32:
|
||||
; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_v2i32:
|
||||
;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) {
|
||||
; GCN-LABEL: {{^}}image_store_v2i32:
|
||||
; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_i32:
|
||||
;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) {
|
||||
; GCN-LABEL: {{^}}image_store_i32:
|
||||
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_f32_i32:
|
||||
;CHECK: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) {
|
||||
; GCN-LABEL: {{^}}image_store_f32_i32:
|
||||
; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_v2f32_v4i32:
|
||||
;CHECK: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) {
|
||||
; GCN-LABEL: {{^}}image_store_v2f32_v4i32:
|
||||
; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_store_mip:
|
||||
;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
|
||||
; GCN-LABEL: {{^}}image_store_mip:
|
||||
; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}getresinfo:
|
||||
;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
|
||||
define amdgpu_ps void @getresinfo() {
|
||||
; GCN-LABEL: {{^}}getresinfo:
|
||||
; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
|
||||
define amdgpu_ps void @getresinfo() #0 {
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
%r0 = extractelement <4 x float> %r, i32 0
|
||||
%r1 = extractelement <4 x float> %r, i32 1
|
||||
%r2 = extractelement <4 x float> %r, i32 2
|
||||
%r3 = extractelement <4 x float> %r, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Ideally, the register allocator would avoid the wait here
|
||||
;
|
||||
;CHECK-LABEL: {{^}}image_store_wait:
|
||||
;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
|
||||
;CHECK: s_waitcnt vmcnt(0) expcnt(0)
|
||||
;CHECK: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) {
|
||||
; GCN-LABEL: {{^}}image_store_wait:
|
||||
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
%data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %arg1, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %arg2, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -149,21 +147,22 @@ main_body:
|
||||
; VI-LABEL: image_load_mmo
|
||||
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
|
||||
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
|
||||
define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) {
|
||||
store float 0.0, float addrspace(3)* %lds
|
||||
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
|
||||
define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) #0 {
|
||||
bb:
|
||||
store float 0.000000e+00, float addrspace(3)* %lds
|
||||
%tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
|
||||
store float 0.0, float addrspace(3)* %tmp2
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tex, float %tex, float %tex, float %tex)
|
||||
store float 0.000000e+00, float addrspace(3)* %tmp2
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tex, float %tex, float %tex, float %tex, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
|
||||
|
||||
declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
|
||||
|
||||
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
@ -173,10 +172,9 @@ declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32,
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #0
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -3,7 +3,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=kabini -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
|
||||
; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}v_interp:
|
||||
; GCN-NOT: s_wqm
|
||||
; GCN: s_mov_b32 m0, s{{[0-9]+}}
|
||||
@ -11,17 +10,17 @@
|
||||
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
|
||||
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
|
||||
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
|
||||
define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
|
||||
define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 {
|
||||
main_body:
|
||||
%i = extractelement <2 x float> %4, i32 0
|
||||
%j = extractelement <2 x float> %4, i32 1
|
||||
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3)
|
||||
%p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3)
|
||||
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3)
|
||||
%p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3)
|
||||
%const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %3)
|
||||
%i = extractelement <2 x float> %arg4, i32 0
|
||||
%j = extractelement <2 x float> %arg4, i32 1
|
||||
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %arg3)
|
||||
%p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %arg3)
|
||||
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %arg3)
|
||||
%p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %arg3)
|
||||
%const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg3)
|
||||
%w = fadd float %p1_1, %const
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %w)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_0, float %p1_1, float %w, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -40,7 +39,8 @@ main_body:
|
||||
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}}
|
||||
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}}
|
||||
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
|
||||
define amdgpu_ps void @v_interp_p1(float %i) {
|
||||
define amdgpu_ps void @v_interp_p1(float %i) #0 {
|
||||
bb:
|
||||
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256)
|
||||
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256)
|
||||
%p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256)
|
||||
@ -80,7 +80,8 @@ define amdgpu_ps void @v_interp_p1(float %i) {
|
||||
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}}
|
||||
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
|
||||
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
|
||||
define amdgpu_ps void @v_interp_p2(float %x, float %j) {
|
||||
define amdgpu_ps void @v_interp_p2(float %x, float %j) #0 {
|
||||
bb:
|
||||
%p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256)
|
||||
%p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256)
|
||||
%p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256)
|
||||
@ -121,7 +122,8 @@ define amdgpu_ps void @v_interp_p2(float %x, float %j) {
|
||||
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr64.y{{$}}
|
||||
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr64.y{{$}}
|
||||
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, attr64.x{{$}}
|
||||
define amdgpu_ps void @v_interp_mov(float %x, float %j) {
|
||||
define amdgpu_ps void @v_interp_mov(float %x, float %j) #0 {
|
||||
bb:
|
||||
%mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256)
|
||||
%mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256)
|
||||
%mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256)
|
||||
@ -164,12 +166,13 @@ define amdgpu_ps void @v_interp_mov(float %x, float %j) {
|
||||
; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
|
||||
; VI: s_mov_b32 m0, -1{{$}}
|
||||
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
|
||||
define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
|
||||
store float 0.0, float addrspace(3)* %lds
|
||||
define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) #0 {
|
||||
bb:
|
||||
store float 0.000000e+00, float addrspace(3)* %lds
|
||||
%tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
|
||||
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
|
||||
store float 0.0, float addrspace(3)* %tmp2
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
|
||||
store float 0.000000e+00, float addrspace(3)* %tmp2
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -178,43 +181,44 @@ define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
|
||||
|
||||
; GCN-LABEL: {{^}}v_interp_p1_bank16_bug:
|
||||
; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
|
||||
define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) {
|
||||
define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 {
|
||||
main_body:
|
||||
%i.i = extractelement <2 x i32> %arg19, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg19, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
%j.f.i = bitcast i32 %j.i to float
|
||||
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #1
|
||||
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #1
|
||||
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #0
|
||||
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #0
|
||||
%i.i7 = extractelement <2 x i32> %arg19, i32 0
|
||||
%j.i8 = extractelement <2 x i32> %arg19, i32 1
|
||||
%i.f.i9 = bitcast i32 %i.i7 to float
|
||||
%j.f.i10 = bitcast i32 %j.i8 to float
|
||||
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #1
|
||||
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #1
|
||||
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #0
|
||||
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #0
|
||||
%i.i1 = extractelement <2 x i32> %arg19, i32 0
|
||||
%j.i2 = extractelement <2 x i32> %arg19, i32 1
|
||||
%i.f.i3 = bitcast i32 %i.i1 to float
|
||||
%j.f.i4 = bitcast i32 %j.i2 to float
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #1
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #1
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #0
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #0
|
||||
%tmp = call float @llvm.fabs.f32(float %p2.i)
|
||||
%tmp34 = call float @llvm.fabs.f32(float %p2.i12)
|
||||
%tmp35 = call float @llvm.fabs.f32(float %p2.i6)
|
||||
%tmp36 = call i32 @llvm.SI.packf16(float %tmp, float %tmp34)
|
||||
%tmp37 = bitcast i32 %tmp36 to float
|
||||
%tmp37 = bitcast i32 %tmp36 to <2 x half>
|
||||
%tmp38 = call i32 @llvm.SI.packf16(float %tmp35, float 1.000000e+00)
|
||||
%tmp39 = bitcast i32 %tmp38 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
|
||||
%tmp39 = bitcast i32 %tmp38 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.SI.packf16(float, float) #0
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -1,24 +1,22 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}mbcnt_intrinsics:
|
||||
; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
|
||||
; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
|
||||
; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
|
||||
|
||||
define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
|
||||
define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) {
|
||||
main_body:
|
||||
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
|
||||
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
|
||||
%4 = bitcast i32 %hi to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4)
|
||||
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #0
|
||||
%tmp = bitcast i32 %hi to float
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp, float %tmp, float %tmp, float %tmp, i1 true, i1 true) #1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
|
||||
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
@ -1,15 +0,0 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
|
||||
|
||||
;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
|
||||
|
||||
define void @test(i32 %p) {
|
||||
%i = mul i32 %p, 2
|
||||
%r = bitcast i32 %i to float
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
@ -1,15 +0,0 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
|
||||
|
||||
;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
|
||||
|
||||
define void @test(i32 %p) {
|
||||
%i = udiv i32 %p, 2
|
||||
%r = bitcast i32 %i to float
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
@ -1,17 +0,0 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
|
||||
;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
|
||||
;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
|
||||
;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
|
||||
define void @test(i32 %p) {
|
||||
%i = udiv i32 %p, 3
|
||||
%r = bitcast i32 %i to float
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
@ -1,25 +1,24 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
; GCN-LABEL: {{^}}vgpr:
|
||||
; GCN: v_mov_b32_e32 v1, v0
|
||||
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
|
||||
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
|
||||
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
|
||||
%x = fadd float %3, 1.0
|
||||
%a = insertvalue {float, float} undef, float %x, 0
|
||||
%b = insertvalue {float, float} %a, float %3, 1
|
||||
ret {float, float} %b
|
||||
define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
%x = fadd float %arg3, 1.000000e+00
|
||||
%a = insertvalue { float, float } undef, float %x, 0
|
||||
%b = insertvalue { float, float } %a, float %arg3, 1
|
||||
ret { float, float } %b
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}vgpr_literal:
|
||||
; GCN: v_mov_b32_e32 v4, v0
|
||||
; GCN: exp mrt0 v4, v4, v4, v4 done compr vm
|
||||
; GCN: exp mrt0 v4, v4, v4, v4 done vm
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 v0, 1.0
|
||||
; GCN-DAG: v_mov_b32_e32 v1, 2.0
|
||||
@ -27,12 +26,12 @@ define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 i
|
||||
; GCN-DAG: v_mov_b32_e32 v3, -1.0
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
|
||||
ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
|
||||
define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
|
||||
}
|
||||
|
||||
|
||||
; GCN: .long 165580
|
||||
; GCN-NEXT: .long 562
|
||||
; GCN-NEXT: .long 165584
|
||||
@ -44,24 +43,24 @@ define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addr
|
||||
; GCN: v_mov_b32_e32 v3, v4
|
||||
; GCN: v_mov_b32_e32 v4, v6
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
|
||||
%i0 = extractelement <2 x i32> %4, i32 0
|
||||
%i1 = extractelement <2 x i32> %4, i32 1
|
||||
%i2 = extractelement <2 x i32> %7, i32 0
|
||||
%i3 = extractelement <2 x i32> %8, i32 0
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
%i2 = extractelement <2 x i32> %arg7, i32 0
|
||||
%i3 = extractelement <2 x i32> %arg8, i32 0
|
||||
%f0 = bitcast i32 %i0 to float
|
||||
%f1 = bitcast i32 %i1 to float
|
||||
%f2 = bitcast i32 %i2 to float
|
||||
%f3 = bitcast i32 %i3 to float
|
||||
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
|
||||
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
|
||||
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
|
||||
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
|
||||
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
|
||||
ret {float, float, float, float, float} %r4
|
||||
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
||||
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
||||
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
||||
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
||||
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
||||
ret { float, float, float, float, float } %r4
|
||||
}
|
||||
|
||||
|
||||
; GCN: .long 165580
|
||||
; GCN-NEXT: .long 1
|
||||
; GCN-NEXT: .long 165584
|
||||
@ -69,11 +68,11 @@ define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i
|
||||
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
|
||||
; GCN: v_mov_b32_e32 v0, 1.0
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
|
||||
ret float 1.0
|
||||
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
bb:
|
||||
ret float 1.000000e+00
|
||||
}
|
||||
|
||||
|
||||
; GCN: .long 165580
|
||||
; GCN-NEXT: .long 2081
|
||||
; GCN-NEXT: .long 165584
|
||||
@ -83,14 +82,14 @@ define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byv
|
||||
; GCN-DAG: v_mov_b32_e32 v1, v2
|
||||
; GCN: v_mov_b32_e32 v2, v3
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
|
||||
%f = bitcast <2 x i32> %8 to <2 x float>
|
||||
%s = insertvalue {float, <2 x float>} undef, float %14, 0
|
||||
%s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
|
||||
ret {float, <2 x float>} %s1
|
||||
define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
bb:
|
||||
%f = bitcast <2 x i32> %arg8 to <2 x float>
|
||||
%s = insertvalue { float, <2 x float> } undef, float %arg14, 0
|
||||
%s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
|
||||
ret { float, <2 x float> } %s1
|
||||
}
|
||||
|
||||
|
||||
; GCN: .long 165580
|
||||
; GCN-NEXT: .long 562
|
||||
; GCN-NEXT: .long 165584
|
||||
@ -102,25 +101,24 @@ define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrsp
|
||||
; GCN-DAG: v_mov_b32_e32 v3, v6
|
||||
; GCN-DAG: v_mov_b32_e32 v4, v8
|
||||
; GCN-NOT: s_endpgm
|
||||
attributes #1 = { "InitialPSInputAddr"="1" }
|
||||
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
|
||||
%i0 = extractelement <2 x i32> %4, i32 0
|
||||
%i1 = extractelement <2 x i32> %4, i32 1
|
||||
%i2 = extractelement <2 x i32> %7, i32 0
|
||||
%i3 = extractelement <2 x i32> %8, i32 0
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
%i2 = extractelement <2 x i32> %arg7, i32 0
|
||||
%i3 = extractelement <2 x i32> %arg8, i32 0
|
||||
%f0 = bitcast i32 %i0 to float
|
||||
%f1 = bitcast i32 %i1 to float
|
||||
%f2 = bitcast i32 %i2 to float
|
||||
%f3 = bitcast i32 %i3 to float
|
||||
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
|
||||
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
|
||||
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
|
||||
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
|
||||
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
|
||||
ret {float, float, float, float, float} %r4
|
||||
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
||||
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
||||
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
||||
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
||||
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
||||
ret { float, float, float, float, float } %r4
|
||||
}
|
||||
|
||||
|
||||
; GCN: .long 165580
|
||||
; GCN-NEXT: .long 562
|
||||
; GCN-NEXT: .long 165584
|
||||
@ -132,25 +130,24 @@ define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i
|
||||
; GCN: v_mov_b32_e32 v3, v8
|
||||
; GCN: v_mov_b32_e32 v4, v12
|
||||
; GCN-NOT: s_endpgm
|
||||
attributes #2 = { "InitialPSInputAddr"="119" }
|
||||
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
|
||||
%i0 = extractelement <2 x i32> %4, i32 0
|
||||
%i1 = extractelement <2 x i32> %4, i32 1
|
||||
%i2 = extractelement <2 x i32> %7, i32 0
|
||||
%i3 = extractelement <2 x i32> %8, i32 0
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
%i2 = extractelement <2 x i32> %arg7, i32 0
|
||||
%i3 = extractelement <2 x i32> %arg8, i32 0
|
||||
%f0 = bitcast i32 %i0 to float
|
||||
%f1 = bitcast i32 %i1 to float
|
||||
%f2 = bitcast i32 %i2 to float
|
||||
%f3 = bitcast i32 %i3 to float
|
||||
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
|
||||
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
|
||||
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
|
||||
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
|
||||
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
|
||||
ret {float, float, float, float, float} %r4
|
||||
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
||||
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
||||
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
||||
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
||||
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
||||
ret { float, float, float, float, float } %r4
|
||||
}
|
||||
|
||||
|
||||
; GCN: .long 165580
|
||||
; GCN-NEXT: .long 562
|
||||
; GCN-NEXT: .long 165584
|
||||
@ -162,38 +159,37 @@ define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x
|
||||
; GCN: v_mov_b32_e32 v3, v4
|
||||
; GCN: v_mov_b32_e32 v4, v8
|
||||
; GCN-NOT: s_endpgm
|
||||
attributes #3 = { "InitialPSInputAddr"="418" }
|
||||
define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
|
||||
%i0 = extractelement <2 x i32> %4, i32 0
|
||||
%i1 = extractelement <2 x i32> %4, i32 1
|
||||
%i2 = extractelement <2 x i32> %7, i32 0
|
||||
%i3 = extractelement <2 x i32> %8, i32 0
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
%i2 = extractelement <2 x i32> %arg7, i32 0
|
||||
%i3 = extractelement <2 x i32> %arg8, i32 0
|
||||
%f0 = bitcast i32 %i0 to float
|
||||
%f1 = bitcast i32 %i1 to float
|
||||
%f2 = bitcast i32 %i2 to float
|
||||
%f3 = bitcast i32 %i3 to float
|
||||
%r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
|
||||
%r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
|
||||
%r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
|
||||
%r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
|
||||
%r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
|
||||
ret {float, float, float, float, float} %r4
|
||||
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
||||
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
||||
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
||||
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
||||
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
||||
ret { float, float, float, float, float } %r4
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}sgpr:
|
||||
; GCN: s_add_i32 s0, s3, 2
|
||||
; GCN: s_mov_b32 s2, s3
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
|
||||
%x = add i32 %2, 2
|
||||
%a = insertvalue {i32, i32, i32} undef, i32 %x, 0
|
||||
%b = insertvalue {i32, i32, i32} %a, i32 %1, 1
|
||||
%c = insertvalue {i32, i32, i32} %a, i32 %2, 2
|
||||
ret {i32, i32, i32} %c
|
||||
define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
%x = add i32 %arg2, 2
|
||||
%a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
|
||||
%b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
|
||||
%c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
|
||||
ret { i32, i32, i32 } %c
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}sgpr_literal:
|
||||
; GCN: s_mov_b32 s0, 5
|
||||
; GCN-NOT: s_mov_b32 s0, s0
|
||||
@ -201,37 +197,37 @@ define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32
|
||||
; GCN-DAG: s_mov_b32 s2, 7
|
||||
; GCN-DAG: s_mov_b32 s3, 8
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
|
||||
%x = add i32 %2, 2
|
||||
ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
|
||||
define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
%x = add i32 %arg2, 2
|
||||
ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}both:
|
||||
; GCN: v_mov_b32_e32 v1, v0
|
||||
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
|
||||
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
|
||||
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
|
||||
; GCN-DAG: s_add_i32 s0, s3, 2
|
||||
; GCN-DAG: s_mov_b32 s1, s2
|
||||
; GCN: s_mov_b32 s2, s3
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
|
||||
%v = fadd float %3, 1.0
|
||||
%s = add i32 %2, 2
|
||||
%a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
|
||||
%a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
|
||||
%a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
|
||||
%a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
|
||||
%a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
|
||||
ret {float, i32, float, i32, i32} %a4
|
||||
define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
%v = fadd float %arg3, 1.000000e+00
|
||||
%s = add i32 %arg2, 2
|
||||
%a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
|
||||
%a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
|
||||
%a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
|
||||
%a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
|
||||
%a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
|
||||
ret { float, i32, float, i32, i32 } %a4
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}structure_literal:
|
||||
; GCN: v_mov_b32_e32 v3, v0
|
||||
; GCN: exp mrt0 v3, v3, v3, v3 done compr vm
|
||||
; GCN: exp mrt0 v3, v3, v3, v3 done vm
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 v0, 1.0
|
||||
; GCN-DAG: s_mov_b32 s0, 2
|
||||
@ -239,9 +235,16 @@ define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2
|
||||
; GCN-DAG: v_mov_b32_e32 v1, 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 v2, 4.0
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
|
||||
ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
|
||||
define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "InitialPSInputAddr"="0" }
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind "InitialPSInputAddr"="0" }
|
||||
attributes #2 = { nounwind "InitialPSInputAddr"="1" }
|
||||
attributes #3 = { nounwind "InitialPSInputAddr"="119" }
|
||||
attributes #4 = { nounwind "InitialPSInputAddr"="418" }
|
||||
|
@ -4,12 +4,9 @@
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
|
||||
define void @main(float %p) {
|
||||
define amdgpu_ps float @main(float inreg %p) {
|
||||
main_body:
|
||||
%c = fcmp oeq float %p, %p
|
||||
%r = select i1 %c, float 1.000000e+00, float 0.000000e+00
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
|
||||
ret void
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
@ -4,12 +4,9 @@
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
|
||||
define void @main(float %p) {
|
||||
define amdgpu_ps float @main(float inreg %p) {
|
||||
main_body:
|
||||
%c = fcmp une float %p, %p
|
||||
%r = select i1 %c, float 1.000000e+00, float 0.000000e+00
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
|
||||
ret void
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
@ -1,13 +1,10 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
|
||||
; CHECK-LABEL: {{^}}phi1:
|
||||
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
|
||||
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
|
||||
define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
|
||||
define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
@ -25,13 +22,13 @@ ELSE: ; preds = %main_body
|
||||
ENDIF: ; preds = %ELSE, %main_body
|
||||
%temp.0 = phi float [ %tmp26, %ELSE ], [ %tmp21, %main_body ]
|
||||
%tmp27 = fadd float %temp.0, %tmp23
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure this program doesn't crash
|
||||
; CHECK-LABEL: {{^}}phi2:
|
||||
define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
@ -58,32 +55,32 @@ main_body:
|
||||
%j.i = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
%j.f.i = bitcast i32 %j.i to float
|
||||
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0
|
||||
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0
|
||||
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #1
|
||||
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #1
|
||||
%i.i19 = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i20 = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i21 = bitcast i32 %i.i19 to float
|
||||
%j.f.i22 = bitcast i32 %j.i20 to float
|
||||
%p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #0
|
||||
%p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #0
|
||||
%p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #1
|
||||
%p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #1
|
||||
%i.i13 = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i14 = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i15 = bitcast i32 %i.i13 to float
|
||||
%j.f.i16 = bitcast i32 %j.i14 to float
|
||||
%p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #0
|
||||
%p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #0
|
||||
%p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #1
|
||||
%p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #1
|
||||
%i.i7 = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i8 = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i9 = bitcast i32 %i.i7 to float
|
||||
%j.f.i10 = bitcast i32 %j.i8 to float
|
||||
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #0
|
||||
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #0
|
||||
%p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #1
|
||||
%p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #1
|
||||
%i.i1 = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i2 = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i3 = bitcast i32 %i.i1 to float
|
||||
%j.f.i4 = bitcast i32 %j.i2 to float
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #0
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #0
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
|
||||
%tmp45 = bitcast float %p2.i to i32
|
||||
%tmp46 = bitcast float %p2.i24 to i32
|
||||
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
|
||||
@ -168,16 +165,16 @@ ENDIF24: ; preds = %IF25, %ENDIF
|
||||
%tmp111 = fsub float -0.000000e+00, %tmp105
|
||||
%tmp112 = fmul float %tmp111, %tmp106
|
||||
%tmp113 = call i32 @llvm.SI.packf16(float %tmp108, float %tmp110)
|
||||
%tmp114 = bitcast i32 %tmp113 to float
|
||||
%tmp114 = bitcast i32 %tmp113 to <2 x half>
|
||||
%tmp115 = call i32 @llvm.SI.packf16(float %tmp112, float 1.000000e+00)
|
||||
%tmp116 = bitcast i32 %tmp115 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp114, float %tmp116, float %tmp114, float %tmp116)
|
||||
%tmp116 = bitcast i32 %tmp115 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp114, <2 x half> %tmp116, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; We just want ot make sure the program doesn't crash
|
||||
; CHECK-LABEL: {{^}}loop:
|
||||
define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
|
||||
define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
@ -204,7 +201,7 @@ LOOP: ; preds = %ENDIF, %main_body
|
||||
br i1 %tmp33, label %IF, label %ENDIF
|
||||
|
||||
IF: ; preds = %LOOP
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00, i1 true, i1 true) #0
|
||||
ret void
|
||||
|
||||
ENDIF: ; preds = %LOOP
|
||||
@ -230,7 +227,7 @@ ENDIF: ; preds = %LOOP
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
|
||||
; CHECK: exp
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
|
||||
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
@ -261,7 +258,7 @@ endif: ; preds = %else, %if
|
||||
%val.0 = phi float [ %val.if.0, %if ], [ %val.else.0, %else ]
|
||||
%val.1 = phi float [ %val.if.1, %if ], [ %val.else.1, %else ]
|
||||
%val.2 = phi float [ %val.if.2, %if ], [ %val.else.2, %else ]
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.000000e+00)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %val.0, float %val.1, float %val.2, float 0.000000e+00, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -294,7 +291,7 @@ endif: ; preds = %if1, %if0, %entry
|
||||
; This test is just checking that we don't crash / assertion fail.
|
||||
; CHECK-LABEL: {{^}}copy2:
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
|
||||
define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
entry:
|
||||
br label %LOOP68
|
||||
|
||||
@ -308,7 +305,7 @@ LOOP68: ; preds = %ENDIF69, %entry
|
||||
IF70: ; preds = %LOOP68
|
||||
%q = icmp ne i32 %l, 13
|
||||
%temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
|
||||
ret void
|
||||
|
||||
ENDIF69: ; preds = %LOOP68
|
||||
@ -330,7 +327,7 @@ ENDIF69: ; preds = %LOOP68
|
||||
; [[END]]:
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #1 {
|
||||
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
bb:
|
||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
|
||||
%tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
|
||||
@ -343,14 +340,14 @@ bb:
|
||||
%j.i = extractelement <2 x i32> %arg7, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
%j.f.i = bitcast i32 %j.i to float
|
||||
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #1
|
||||
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #1
|
||||
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #0
|
||||
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #0
|
||||
%i.i1 = extractelement <2 x i32> %arg7, i32 0
|
||||
%j.i2 = extractelement <2 x i32> %arg7, i32 1
|
||||
%i.f.i3 = bitcast i32 %i.i1 to float
|
||||
%j.f.i4 = bitcast i32 %j.i2 to float
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
|
||||
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #0
|
||||
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #0
|
||||
%tmp31 = bitcast float %tmp23 to i32
|
||||
%tmp36 = icmp ne i32 %tmp31, 0
|
||||
br i1 %tmp36, label %bb38, label %bb80
|
||||
@ -377,80 +374,58 @@ bb80: ; preds = %bb
|
||||
bb71: ; preds = %bb80, %bb38
|
||||
%tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
|
||||
%tmp88 = extractelement <4 x float> %tmp72, i32 0
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp88, float %tmp88, float %tmp88, float %tmp88, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check the the resource descriptor is stored in an sgpr.
|
||||
; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
|
||||
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #1 {
|
||||
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
|
||||
bb:
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
|
||||
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tmp10 = extractelement <4 x float> %tmp9, i32 0
|
||||
%tmp12 = call i32 @llvm.SI.packf16(float undef, float %tmp10)
|
||||
%tmp13 = bitcast i32 %tmp12 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
|
||||
%tmp13 = bitcast i32 %tmp12 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check the the sampler is stored in an sgpr.
|
||||
; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
|
||||
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #1 {
|
||||
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
|
||||
bb:
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
|
||||
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tmp10 = extractelement <4 x float> %tmp9, i32 0
|
||||
%tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
|
||||
%tmp13 = bitcast i32 %tmp12 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
|
||||
%tmp13 = bitcast i32 %tmp12 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.amdgcn.rsq.f32(float) #1
|
||||
declare float @llvm.exp2.f32(float) #1
|
||||
declare float @llvm.pow.f32(float, float) #1
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.rsq.f32(float) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.exp2.f32(float) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.pow.f32(float, float) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.SI.packf16(float, float) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind readonly }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = !{!1, !1, i64 0, i32 1}
|
||||
!1 = !{!"const", !2}
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; XUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
|
||||
@ -466,4 +466,12 @@ define void @s_shl_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 a
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_mul2:
|
||||
; GCN: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
|
||||
define void @test_mul2(i32 %p) {
|
||||
%i = mul i32 %p, 2
|
||||
store volatile i32 %i, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
@ -1,14 +0,0 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}main:
|
||||
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
|
||||
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
|
||||
define amdgpu_vs void @main(float) {
|
||||
main_body:
|
||||
%1 = fmul float %0, 0x3FE86A7F00000000
|
||||
%2 = fmul float %0, 0xBFE86A7F00000000
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %1, float %1, float %2, float %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
@ -1,11 +1,11 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; This shader has the potential to generated illegal VGPR to SGPR copies if
|
||||
; the wrong register class is used for the REG_SEQUENCE instructions.
|
||||
|
||||
; CHECK: {{^}}main:
|
||||
; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
|
||||
; GCN-LABEL: {{^}}main:
|
||||
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
|
||||
define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
@ -40,26 +40,16 @@ main_body:
|
||||
%tmp37 = extractelement <4 x float> %tmp35, i32 1
|
||||
%tmp38 = extractelement <4 x float> %tmp35, i32 2
|
||||
%tmp39 = extractelement <4 x float> %tmp35, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp36, float %tmp37, float %tmp38, float %tmp39)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -3,7 +3,7 @@
|
||||
; The only way the subtarget knows that the si machine scheduler is being used
|
||||
; is to specify -mattr=si-scheduler. If we just pass --misched=si, the backend
|
||||
; won't know what scheduler we are using.
|
||||
; RUN: llc -march=amdgcn -mcpu=SI --misched=si -mattr=si-scheduler < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn --misched=si -mattr=si-scheduler < %s | FileCheck %s
|
||||
|
||||
; The test checks the "si" machine scheduler pass works correctly.
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
; CHECK: s_waitcnt vmcnt(0)
|
||||
; CHECK: exp
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
|
||||
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
main_body:
|
||||
%tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
|
||||
%tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
|
||||
@ -46,29 +46,22 @@ main_body:
|
||||
%tmp34 = extractelement <4 x float> %tmp31, i32 2
|
||||
%tmp35 = extractelement <4 x float> %tmp31, i32 3
|
||||
%tmp36 = call i32 @llvm.SI.packf16(float %tmp32, float %tmp33)
|
||||
%tmp37 = bitcast i32 %tmp36 to float
|
||||
%tmp37 = bitcast i32 %tmp36 to <2 x half>
|
||||
%tmp38 = call i32 @llvm.SI.packf16(float %tmp34, float %tmp35)
|
||||
%tmp39 = bitcast i32 %tmp38 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
|
||||
%tmp39 = bitcast i32 %tmp38 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.SI.packf16(float, float) #0
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = !{!1, !1, i64 0, i32 1}
|
||||
!1 = !{!"const", !2}
|
||||
|
@ -732,10 +732,10 @@ IF67: ; preds = %LOOP65
|
||||
%tmp579 = fmul float %tmp574, %tmp45
|
||||
%tmp580 = fadd float %tmp579, %tmp556
|
||||
%tmp581 = call i32 @llvm.SI.packf16(float %tmp576, float %tmp578)
|
||||
%tmp582 = bitcast i32 %tmp581 to float
|
||||
%tmp582 = bitcast i32 %tmp581 to <2 x half>
|
||||
%tmp583 = call i32 @llvm.SI.packf16(float %tmp580, float %tmp282)
|
||||
%tmp584 = bitcast i32 %tmp583 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp582, float %tmp584, float %tmp582, float %tmp584)
|
||||
%tmp584 = bitcast i32 %tmp583 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp582, <2 x half> %tmp584, i1 true, i1 true) #0
|
||||
ret void
|
||||
|
||||
ENDIF66: ; preds = %LOOP65
|
||||
@ -1814,10 +1814,10 @@ ENDIF209: ; preds = %ELSE214, %ELSE211,
|
||||
%max.0.i1 = call float @llvm.maxnum.f32(float %tmp774, float 0.000000e+00)
|
||||
%clamp.i2 = call float @llvm.minnum.f32(float %max.0.i1, float 1.000000e+00)
|
||||
%tmp776 = call i32 @llvm.SI.packf16(float %tmp768, float %tmp770)
|
||||
%tmp777 = bitcast i32 %tmp776 to float
|
||||
%tmp777 = bitcast i32 %tmp776 to <2 x half>
|
||||
%tmp778 = call i32 @llvm.SI.packf16(float %tmp772, float %clamp.i2)
|
||||
%tmp779 = bitcast i32 %tmp778 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp777, float %tmp779, float %tmp777, float %tmp779)
|
||||
%tmp779 = bitcast i32 %tmp778 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp777, <2 x half> %tmp779, i1 true, i1 true) #0
|
||||
ret void
|
||||
|
||||
ELSE214: ; preds = %ELSE211
|
||||
@ -1835,11 +1835,11 @@ ELSE214: ; preds = %ELSE211
|
||||
|
||||
declare float @llvm.exp2.f32(float) #1
|
||||
declare float @llvm.ceil.f32(float) #1
|
||||
declare float @llvm.amdgcn.rsq.f32(float) #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.pow.f32(float, float) #1
|
||||
declare float @llvm.minnum.f32(float, float) #1
|
||||
declare float @llvm.maxnum.f32(float, float) #1
|
||||
declare float @llvm.amdgcn.rsq.f32(float) #1
|
||||
declare float @llvm.amdgcn.cubeid(float, float, float) #1
|
||||
declare float @llvm.amdgcn.cubesc(float, float, float) #1
|
||||
declare float @llvm.amdgcn.cubetc(float, float, float) #1
|
||||
@ -1848,13 +1848,14 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -6,270 +6,271 @@
|
||||
|
||||
; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
|
||||
; SI-NOT: v_readlane_b32 [[SAVED]]
|
||||
|
||||
define amdgpu_ps void @main() #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
|
||||
%1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
|
||||
%2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
|
||||
%3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
|
||||
%4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
|
||||
%5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
|
||||
%6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
|
||||
%7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
|
||||
%8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
|
||||
%9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
|
||||
%10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
|
||||
%11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
|
||||
%12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
|
||||
%13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
|
||||
%14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
|
||||
%15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
|
||||
%16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
|
||||
%17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
|
||||
%18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
|
||||
%19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
|
||||
%20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
|
||||
%21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
|
||||
%22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
|
||||
%23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
|
||||
%24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
|
||||
%25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
|
||||
%26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
|
||||
%27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
|
||||
%28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
|
||||
%29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
|
||||
%30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
|
||||
%31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
|
||||
%32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
|
||||
%33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
|
||||
%34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
|
||||
%35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
|
||||
%36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
|
||||
%37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
|
||||
%38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
|
||||
%39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
|
||||
%40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
|
||||
%41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
|
||||
%42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
|
||||
%43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
|
||||
%44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
|
||||
%45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
|
||||
%46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
|
||||
%47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
|
||||
%48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
|
||||
%49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
|
||||
%50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
|
||||
%51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
|
||||
%52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
|
||||
%53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
|
||||
%54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
|
||||
%55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
|
||||
%56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
|
||||
%57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
|
||||
%58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
|
||||
%59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
|
||||
%60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
|
||||
%61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
|
||||
%62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
|
||||
%63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
|
||||
%64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
|
||||
%65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
|
||||
%66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
|
||||
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
|
||||
%tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
|
||||
%tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
|
||||
%tmp3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
|
||||
%tmp4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
|
||||
%tmp5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
|
||||
%tmp6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
|
||||
%tmp7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
|
||||
%tmp8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
|
||||
%tmp9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
|
||||
%tmp10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
|
||||
%tmp11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
|
||||
%tmp12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
|
||||
%tmp13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
|
||||
%tmp14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
|
||||
%tmp15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
|
||||
%tmp16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
|
||||
%tmp17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
|
||||
%tmp18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
|
||||
%tmp19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
|
||||
%tmp20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
|
||||
%tmp24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
|
||||
%tmp25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
|
||||
%tmp26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
|
||||
%tmp27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
|
||||
%tmp28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
|
||||
%tmp29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
|
||||
%tmp30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
|
||||
%tmp31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
|
||||
%tmp32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
|
||||
%tmp33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
|
||||
%tmp34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
|
||||
%tmp35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
|
||||
%tmp36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
|
||||
%tmp37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
|
||||
%tmp38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
|
||||
%tmp39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
|
||||
%tmp40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
|
||||
%tmp41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
|
||||
%tmp42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
|
||||
%tmp43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
|
||||
%tmp44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
|
||||
%tmp45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
|
||||
%tmp46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
|
||||
%tmp47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
|
||||
%tmp48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
|
||||
%tmp49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
|
||||
%tmp50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
|
||||
%tmp51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
|
||||
%tmp52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
|
||||
%tmp53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
|
||||
%tmp54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
|
||||
%tmp55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
|
||||
%tmp56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
|
||||
%tmp57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
|
||||
%tmp58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
|
||||
%tmp59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
|
||||
%tmp60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
|
||||
%tmp61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
|
||||
%tmp62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
|
||||
%tmp63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
|
||||
%tmp64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
|
||||
%tmp65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
|
||||
%tmp66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
|
||||
br label %LOOP
|
||||
|
||||
LOOP: ; preds = %ENDIF2795, %main_body
|
||||
%temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
|
||||
%temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%67 = icmp sgt i32 %tid, 4
|
||||
br i1 %67, label %ENDLOOP, label %ENDIF
|
||||
%tmp67 = icmp sgt i32 %tid, 4
|
||||
br i1 %tmp67, label %ENDLOOP, label %ENDIF
|
||||
|
||||
ENDLOOP: ; preds = %ELSE2566, %LOOP
|
||||
%one.sub.a.i = fsub float 1.000000e+00, %0
|
||||
%one.sub.a.i = fsub float 1.000000e+00, %tmp
|
||||
%one.sub.ac.i = fmul float %one.sub.a.i, undef
|
||||
%result.i = fadd float fmul (float undef, float undef), %one.sub.ac.i
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %result.i, float undef, float 1.000000e+00)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float %result.i, float undef, float 1.000000e+00, i1 true, i1 true) #0
|
||||
ret void
|
||||
|
||||
ENDIF: ; preds = %LOOP
|
||||
%68 = fsub float %2, undef
|
||||
%69 = fsub float %3, undef
|
||||
%70 = fsub float %4, undef
|
||||
%71 = fmul float %68, 0.000000e+00
|
||||
%72 = fmul float %69, undef
|
||||
%73 = fmul float %70, undef
|
||||
%74 = fsub float %6, undef
|
||||
%75 = fsub float %7, undef
|
||||
%76 = fmul float %74, undef
|
||||
%77 = fmul float %75, 0.000000e+00
|
||||
%78 = call float @llvm.minnum.f32(float %73, float %77)
|
||||
%79 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00)
|
||||
%80 = call float @llvm.maxnum.f32(float %72, float %76)
|
||||
%81 = call float @llvm.maxnum.f32(float undef, float %78)
|
||||
%82 = call float @llvm.minnum.f32(float %79, float %80)
|
||||
%83 = call float @llvm.minnum.f32(float %82, float undef)
|
||||
%84 = fsub float %14, undef
|
||||
%85 = fsub float %15, undef
|
||||
%86 = fsub float %16, undef
|
||||
%87 = fmul float %84, undef
|
||||
%88 = fmul float %85, undef
|
||||
%89 = fmul float %86, undef
|
||||
%90 = fsub float %17, undef
|
||||
%91 = fsub float %18, undef
|
||||
%92 = fsub float %19, undef
|
||||
%93 = fmul float %90, 0.000000e+00
|
||||
%94 = fmul float %91, undef
|
||||
%95 = fmul float %92, undef
|
||||
%96 = call float @llvm.minnum.f32(float %88, float %94)
|
||||
%97 = call float @llvm.maxnum.f32(float %87, float %93)
|
||||
%98 = call float @llvm.maxnum.f32(float %89, float %95)
|
||||
%99 = call float @llvm.maxnum.f32(float undef, float %96)
|
||||
%100 = call float @llvm.maxnum.f32(float %99, float undef)
|
||||
%101 = call float @llvm.minnum.f32(float %97, float undef)
|
||||
%102 = call float @llvm.minnum.f32(float %101, float %98)
|
||||
%103 = fsub float %30, undef
|
||||
%104 = fsub float %31, undef
|
||||
%105 = fmul float %103, 0.000000e+00
|
||||
%106 = fmul float %104, 0.000000e+00
|
||||
%107 = call float @llvm.minnum.f32(float undef, float %105)
|
||||
%108 = call float @llvm.maxnum.f32(float undef, float %106)
|
||||
%109 = call float @llvm.maxnum.f32(float undef, float %107)
|
||||
%110 = call float @llvm.maxnum.f32(float %109, float undef)
|
||||
%111 = call float @llvm.minnum.f32(float undef, float %108)
|
||||
%112 = fsub float %32, undef
|
||||
%113 = fsub float %33, undef
|
||||
%114 = fsub float %34, undef
|
||||
%115 = fmul float %112, 0.000000e+00
|
||||
%116 = fmul float %113, undef
|
||||
%117 = fmul float %114, undef
|
||||
%118 = fsub float %35, undef
|
||||
%119 = fsub float %36, undef
|
||||
%120 = fsub float %37, undef
|
||||
%121 = fmul float %118, undef
|
||||
%122 = fmul float %119, undef
|
||||
%123 = fmul float %120, undef
|
||||
%124 = call float @llvm.minnum.f32(float %115, float %121)
|
||||
%125 = call float @llvm.minnum.f32(float %116, float %122)
|
||||
%126 = call float @llvm.minnum.f32(float %117, float %123)
|
||||
%127 = call float @llvm.maxnum.f32(float %124, float %125)
|
||||
%128 = call float @llvm.maxnum.f32(float %127, float %126)
|
||||
%129 = fsub float %38, undef
|
||||
%130 = fsub float %39, undef
|
||||
%131 = fsub float %40, undef
|
||||
%132 = fmul float %129, 0.000000e+00
|
||||
%133 = fmul float %130, undef
|
||||
%134 = fmul float %131, undef
|
||||
%135 = fsub float %41, undef
|
||||
%136 = fsub float %42, undef
|
||||
%137 = fsub float %43, undef
|
||||
%138 = fmul float %135, undef
|
||||
%139 = fmul float %136, undef
|
||||
%140 = fmul float %137, undef
|
||||
%141 = call float @llvm.minnum.f32(float %132, float %138)
|
||||
%142 = call float @llvm.minnum.f32(float %133, float %139)
|
||||
%143 = call float @llvm.minnum.f32(float %134, float %140)
|
||||
%144 = call float @llvm.maxnum.f32(float %141, float %142)
|
||||
%145 = call float @llvm.maxnum.f32(float %144, float %143)
|
||||
%146 = fsub float %44, undef
|
||||
%147 = fsub float %45, undef
|
||||
%148 = fsub float %46, undef
|
||||
%149 = fmul float %146, 0.000000e+00
|
||||
%150 = fmul float %147, 0.000000e+00
|
||||
%151 = fmul float %148, undef
|
||||
%152 = fsub float %47, undef
|
||||
%153 = fsub float %48, undef
|
||||
%154 = fsub float %49, undef
|
||||
%155 = fmul float %152, undef
|
||||
%156 = fmul float %153, 0.000000e+00
|
||||
%157 = fmul float %154, undef
|
||||
%158 = call float @llvm.minnum.f32(float %149, float %155)
|
||||
%159 = call float @llvm.minnum.f32(float %150, float %156)
|
||||
%160 = call float @llvm.minnum.f32(float %151, float %157)
|
||||
%161 = call float @llvm.maxnum.f32(float %158, float %159)
|
||||
%162 = call float @llvm.maxnum.f32(float %161, float %160)
|
||||
%163 = fsub float %50, undef
|
||||
%164 = fsub float %51, undef
|
||||
%165 = fsub float %52, undef
|
||||
%166 = fmul float %163, undef
|
||||
%167 = fmul float %164, 0.000000e+00
|
||||
%168 = fmul float %165, 0.000000e+00
|
||||
%169 = fsub float %53, undef
|
||||
%170 = fsub float %54, undef
|
||||
%171 = fsub float %55, undef
|
||||
%172 = fdiv float 1.000000e+00, %temp18.0
|
||||
%173 = fmul float %169, undef
|
||||
%174 = fmul float %170, undef
|
||||
%175 = fmul float %171, %172
|
||||
%176 = call float @llvm.minnum.f32(float %166, float %173)
|
||||
%177 = call float @llvm.minnum.f32(float %167, float %174)
|
||||
%178 = call float @llvm.minnum.f32(float %168, float %175)
|
||||
%179 = call float @llvm.maxnum.f32(float %176, float %177)
|
||||
%180 = call float @llvm.maxnum.f32(float %179, float %178)
|
||||
%181 = fsub float %62, undef
|
||||
%182 = fsub float %63, undef
|
||||
%183 = fsub float %64, undef
|
||||
%184 = fmul float %181, 0.000000e+00
|
||||
%185 = fmul float %182, undef
|
||||
%186 = fmul float %183, undef
|
||||
%187 = fsub float %65, undef
|
||||
%188 = fsub float %66, undef
|
||||
%189 = fmul float %187, undef
|
||||
%190 = fmul float %188, undef
|
||||
%191 = call float @llvm.maxnum.f32(float %184, float %189)
|
||||
%192 = call float @llvm.maxnum.f32(float %185, float %190)
|
||||
%193 = call float @llvm.maxnum.f32(float %186, float undef)
|
||||
%194 = call float @llvm.minnum.f32(float %191, float %192)
|
||||
%195 = call float @llvm.minnum.f32(float %194, float %193)
|
||||
%.temp292.7 = select i1 undef, float %162, float undef
|
||||
%temp292.9 = select i1 false, float %180, float %.temp292.7
|
||||
%tmp68 = fsub float %tmp2, undef
|
||||
%tmp69 = fsub float %tmp3, undef
|
||||
%tmp70 = fsub float %tmp4, undef
|
||||
%tmp71 = fmul float %tmp68, 0.000000e+00
|
||||
%tmp72 = fmul float %tmp69, undef
|
||||
%tmp73 = fmul float %tmp70, undef
|
||||
%tmp74 = fsub float %tmp6, undef
|
||||
%tmp75 = fsub float %tmp7, undef
|
||||
%tmp76 = fmul float %tmp74, undef
|
||||
%tmp77 = fmul float %tmp75, 0.000000e+00
|
||||
%tmp78 = call float @llvm.minnum.f32(float %tmp73, float %tmp77)
|
||||
%tmp79 = call float @llvm.maxnum.f32(float %tmp71, float 0.000000e+00)
|
||||
%tmp80 = call float @llvm.maxnum.f32(float %tmp72, float %tmp76)
|
||||
%tmp81 = call float @llvm.maxnum.f32(float undef, float %tmp78)
|
||||
%tmp82 = call float @llvm.minnum.f32(float %tmp79, float %tmp80)
|
||||
%tmp83 = call float @llvm.minnum.f32(float %tmp82, float undef)
|
||||
%tmp84 = fsub float %tmp14, undef
|
||||
%tmp85 = fsub float %tmp15, undef
|
||||
%tmp86 = fsub float %tmp16, undef
|
||||
%tmp87 = fmul float %tmp84, undef
|
||||
%tmp88 = fmul float %tmp85, undef
|
||||
%tmp89 = fmul float %tmp86, undef
|
||||
%tmp90 = fsub float %tmp17, undef
|
||||
%tmp91 = fsub float %tmp18, undef
|
||||
%tmp92 = fsub float %tmp19, undef
|
||||
%tmp93 = fmul float %tmp90, 0.000000e+00
|
||||
%tmp94 = fmul float %tmp91, undef
|
||||
%tmp95 = fmul float %tmp92, undef
|
||||
%tmp96 = call float @llvm.minnum.f32(float %tmp88, float %tmp94)
|
||||
%tmp97 = call float @llvm.maxnum.f32(float %tmp87, float %tmp93)
|
||||
%tmp98 = call float @llvm.maxnum.f32(float %tmp89, float %tmp95)
|
||||
%tmp99 = call float @llvm.maxnum.f32(float undef, float %tmp96)
|
||||
%tmp100 = call float @llvm.maxnum.f32(float %tmp99, float undef)
|
||||
%tmp101 = call float @llvm.minnum.f32(float %tmp97, float undef)
|
||||
%tmp102 = call float @llvm.minnum.f32(float %tmp101, float %tmp98)
|
||||
%tmp103 = fsub float %tmp30, undef
|
||||
%tmp104 = fsub float %tmp31, undef
|
||||
%tmp105 = fmul float %tmp103, 0.000000e+00
|
||||
%tmp106 = fmul float %tmp104, 0.000000e+00
|
||||
%tmp107 = call float @llvm.minnum.f32(float undef, float %tmp105)
|
||||
%tmp108 = call float @llvm.maxnum.f32(float undef, float %tmp106)
|
||||
%tmp109 = call float @llvm.maxnum.f32(float undef, float %tmp107)
|
||||
%tmp110 = call float @llvm.maxnum.f32(float %tmp109, float undef)
|
||||
%tmp111 = call float @llvm.minnum.f32(float undef, float %tmp108)
|
||||
%tmp112 = fsub float %tmp32, undef
|
||||
%tmp113 = fsub float %tmp33, undef
|
||||
%tmp114 = fsub float %tmp34, undef
|
||||
%tmp115 = fmul float %tmp112, 0.000000e+00
|
||||
%tmp116 = fmul float %tmp113, undef
|
||||
%tmp117 = fmul float %tmp114, undef
|
||||
%tmp118 = fsub float %tmp35, undef
|
||||
%tmp119 = fsub float %tmp36, undef
|
||||
%tmp120 = fsub float %tmp37, undef
|
||||
%tmp121 = fmul float %tmp118, undef
|
||||
%tmp122 = fmul float %tmp119, undef
|
||||
%tmp123 = fmul float %tmp120, undef
|
||||
%tmp124 = call float @llvm.minnum.f32(float %tmp115, float %tmp121)
|
||||
%tmp125 = call float @llvm.minnum.f32(float %tmp116, float %tmp122)
|
||||
%tmp126 = call float @llvm.minnum.f32(float %tmp117, float %tmp123)
|
||||
%tmp127 = call float @llvm.maxnum.f32(float %tmp124, float %tmp125)
|
||||
%tmp128 = call float @llvm.maxnum.f32(float %tmp127, float %tmp126)
|
||||
%tmp129 = fsub float %tmp38, undef
|
||||
%tmp130 = fsub float %tmp39, undef
|
||||
%tmp131 = fsub float %tmp40, undef
|
||||
%tmp132 = fmul float %tmp129, 0.000000e+00
|
||||
%tmp133 = fmul float %tmp130, undef
|
||||
%tmp134 = fmul float %tmp131, undef
|
||||
%tmp135 = fsub float %tmp41, undef
|
||||
%tmp136 = fsub float %tmp42, undef
|
||||
%tmp137 = fsub float %tmp43, undef
|
||||
%tmp138 = fmul float %tmp135, undef
|
||||
%tmp139 = fmul float %tmp136, undef
|
||||
%tmp140 = fmul float %tmp137, undef
|
||||
%tmp141 = call float @llvm.minnum.f32(float %tmp132, float %tmp138)
|
||||
%tmp142 = call float @llvm.minnum.f32(float %tmp133, float %tmp139)
|
||||
%tmp143 = call float @llvm.minnum.f32(float %tmp134, float %tmp140)
|
||||
%tmp144 = call float @llvm.maxnum.f32(float %tmp141, float %tmp142)
|
||||
%tmp145 = call float @llvm.maxnum.f32(float %tmp144, float %tmp143)
|
||||
%tmp146 = fsub float %tmp44, undef
|
||||
%tmp147 = fsub float %tmp45, undef
|
||||
%tmp148 = fsub float %tmp46, undef
|
||||
%tmp149 = fmul float %tmp146, 0.000000e+00
|
||||
%tmp150 = fmul float %tmp147, 0.000000e+00
|
||||
%tmp151 = fmul float %tmp148, undef
|
||||
%tmp152 = fsub float %tmp47, undef
|
||||
%tmp153 = fsub float %tmp48, undef
|
||||
%tmp154 = fsub float %tmp49, undef
|
||||
%tmp155 = fmul float %tmp152, undef
|
||||
%tmp156 = fmul float %tmp153, 0.000000e+00
|
||||
%tmp157 = fmul float %tmp154, undef
|
||||
%tmp158 = call float @llvm.minnum.f32(float %tmp149, float %tmp155)
|
||||
%tmp159 = call float @llvm.minnum.f32(float %tmp150, float %tmp156)
|
||||
%tmp160 = call float @llvm.minnum.f32(float %tmp151, float %tmp157)
|
||||
%tmp161 = call float @llvm.maxnum.f32(float %tmp158, float %tmp159)
|
||||
%tmp162 = call float @llvm.maxnum.f32(float %tmp161, float %tmp160)
|
||||
%tmp163 = fsub float %tmp50, undef
|
||||
%tmp164 = fsub float %tmp51, undef
|
||||
%tmp165 = fsub float %tmp52, undef
|
||||
%tmp166 = fmul float %tmp163, undef
|
||||
%tmp167 = fmul float %tmp164, 0.000000e+00
|
||||
%tmp168 = fmul float %tmp165, 0.000000e+00
|
||||
%tmp169 = fsub float %tmp53, undef
|
||||
%tmp170 = fsub float %tmp54, undef
|
||||
%tmp171 = fsub float %tmp55, undef
|
||||
%tmp172 = fdiv float 1.000000e+00, %temp18.0
|
||||
%tmp173 = fmul float %tmp169, undef
|
||||
%tmp174 = fmul float %tmp170, undef
|
||||
%tmp175 = fmul float %tmp171, %tmp172
|
||||
%tmp176 = call float @llvm.minnum.f32(float %tmp166, float %tmp173)
|
||||
%tmp177 = call float @llvm.minnum.f32(float %tmp167, float %tmp174)
|
||||
%tmp178 = call float @llvm.minnum.f32(float %tmp168, float %tmp175)
|
||||
%tmp179 = call float @llvm.maxnum.f32(float %tmp176, float %tmp177)
|
||||
%tmp180 = call float @llvm.maxnum.f32(float %tmp179, float %tmp178)
|
||||
%tmp181 = fsub float %tmp62, undef
|
||||
%tmp182 = fsub float %tmp63, undef
|
||||
%tmp183 = fsub float %tmp64, undef
|
||||
%tmp184 = fmul float %tmp181, 0.000000e+00
|
||||
%tmp185 = fmul float %tmp182, undef
|
||||
%tmp186 = fmul float %tmp183, undef
|
||||
%tmp187 = fsub float %tmp65, undef
|
||||
%tmp188 = fsub float %tmp66, undef
|
||||
%tmp189 = fmul float %tmp187, undef
|
||||
%tmp190 = fmul float %tmp188, undef
|
||||
%tmp191 = call float @llvm.maxnum.f32(float %tmp184, float %tmp189)
|
||||
%tmp192 = call float @llvm.maxnum.f32(float %tmp185, float %tmp190)
|
||||
%tmp193 = call float @llvm.maxnum.f32(float %tmp186, float undef)
|
||||
%tmp194 = call float @llvm.minnum.f32(float %tmp191, float %tmp192)
|
||||
%tmp195 = call float @llvm.minnum.f32(float %tmp194, float %tmp193)
|
||||
%.temp292.7 = select i1 undef, float %tmp162, float undef
|
||||
%temp292.9 = select i1 false, float %tmp180, float %.temp292.7
|
||||
%.temp292.9 = select i1 undef, float undef, float %temp292.9
|
||||
%196 = fcmp ogt float undef, 0.000000e+00
|
||||
%197 = fcmp olt float undef, %195
|
||||
%198 = and i1 %196, %197
|
||||
%199 = fcmp olt float undef, %.temp292.9
|
||||
%200 = and i1 %198, %199
|
||||
%temp292.11 = select i1 %200, float undef, float %.temp292.9
|
||||
%tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
|
||||
%tmp196 = fcmp ogt float undef, 0.000000e+00
|
||||
%tmp197 = fcmp olt float undef, %tmp195
|
||||
%tmp198 = and i1 %tmp196, %tmp197
|
||||
%tmp199 = fcmp olt float undef, %.temp292.9
|
||||
%tmp200 = and i1 %tmp198, %tmp199
|
||||
%temp292.11 = select i1 %tmp200, float undef, float %.temp292.9
|
||||
%tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%cmp0 = icmp eq i32 %tid0, 0
|
||||
br i1 %cmp0, label %IF2565, label %ELSE2566
|
||||
|
||||
IF2565: ; preds = %ENDIF
|
||||
%tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
|
||||
%tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%cmp1 = icmp eq i32 %tid1, 0
|
||||
br i1 %cmp1, label %ENDIF2582, label %ELSE2584
|
||||
|
||||
ELSE2566: ; preds = %ENDIF
|
||||
%tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
|
||||
%tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%tidf = bitcast i32 %tid2 to float
|
||||
%201 = fcmp oeq float %temp292.11, %tidf
|
||||
br i1 %201, label %ENDLOOP, label %ELSE2593
|
||||
%tmp201 = fcmp oeq float %temp292.11, %tidf
|
||||
br i1 %tmp201, label %ENDLOOP, label %ELSE2593
|
||||
|
||||
ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
|
||||
%temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
|
||||
%temp18.1 = phi float [ %218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
|
||||
%202 = fsub float %5, undef
|
||||
%203 = fmul float %202, undef
|
||||
%204 = call float @llvm.maxnum.f32(float undef, float %203)
|
||||
%205 = call float @llvm.minnum.f32(float %204, float undef)
|
||||
%206 = call float @llvm.minnum.f32(float %205, float undef)
|
||||
%207 = fcmp ogt float undef, 0.000000e+00
|
||||
%208 = fcmp olt float undef, 1.000000e+00
|
||||
%209 = and i1 %207, %208
|
||||
%tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
|
||||
%temp18.1 = phi float [ %tmp218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
|
||||
%tmp202 = fsub float %tmp5, undef
|
||||
%tmp203 = fmul float %tmp202, undef
|
||||
%tmp204 = call float @llvm.maxnum.f32(float undef, float %tmp203)
|
||||
%tmp205 = call float @llvm.minnum.f32(float %tmp204, float undef)
|
||||
%tmp206 = call float @llvm.minnum.f32(float %tmp205, float undef)
|
||||
%tmp207 = fcmp ogt float undef, 0.000000e+00
|
||||
%tmp208 = fcmp olt float undef, 1.000000e+00
|
||||
%tmp209 = and i1 %tmp207, %tmp208
|
||||
%tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%tidf3 = bitcast i32 %tid3 to float
|
||||
%210 = fcmp olt float %tidf3, %206
|
||||
%211 = and i1 %209, %210
|
||||
br i1 %211, label %ENDIF2795, label %ELSE2797
|
||||
%tmp210 = fcmp olt float %tidf3, %tmp206
|
||||
%tmp211 = and i1 %tmp209, %tmp210
|
||||
br i1 %tmp211, label %ENDIF2795, label %ELSE2797
|
||||
|
||||
ELSE2584: ; preds = %IF2565
|
||||
br label %ENDIF2582
|
||||
|
||||
ENDIF2582: ; preds = %ELSE2584, %IF2565
|
||||
%212 = fadd float %1, undef
|
||||
%213 = fadd float 0.000000e+00, %212
|
||||
%floor = call float @llvm.floor.f32(float %213)
|
||||
%214 = fsub float %213, %floor
|
||||
%tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
|
||||
%tmp212 = fadd float %tmp1, undef
|
||||
%tmp213 = fadd float 0.000000e+00, %tmp212
|
||||
%floor = call float @llvm.floor.f32(float %tmp213)
|
||||
%tmp214 = fsub float %tmp213, %floor
|
||||
%tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||
%cmp4 = icmp eq i32 %tid4, 0
|
||||
br i1 %cmp4, label %IF2589, label %ELSE2590
|
||||
|
||||
@ -280,61 +281,61 @@ ELSE2590: ; preds = %ENDIF2582
|
||||
br label %ENDIF2588
|
||||
|
||||
ENDIF2588: ; preds = %ELSE2590, %IF2589
|
||||
%215 = fsub float 1.000000e+00, %214
|
||||
%216 = call float @llvm.sqrt.f32(float %215)
|
||||
%217 = fmul float %216, undef
|
||||
%218 = fadd float %217, undef
|
||||
%tmp215 = fsub float 1.000000e+00, %tmp214
|
||||
%tmp216 = call float @llvm.sqrt.f32(float %tmp215)
|
||||
%tmp217 = fmul float %tmp216, undef
|
||||
%tmp218 = fadd float %tmp217, undef
|
||||
br label %ENDIF2564
|
||||
|
||||
ELSE2593: ; preds = %ELSE2566
|
||||
%219 = fcmp oeq float %temp292.11, %81
|
||||
%220 = fcmp olt float %81, %83
|
||||
%221 = and i1 %219, %220
|
||||
br i1 %221, label %ENDIF2594, label %ELSE2596
|
||||
%tmp219 = fcmp oeq float %temp292.11, %tmp81
|
||||
%tmp220 = fcmp olt float %tmp81, %tmp83
|
||||
%tmp221 = and i1 %tmp219, %tmp220
|
||||
br i1 %tmp221, label %ENDIF2594, label %ELSE2596
|
||||
|
||||
ELSE2596: ; preds = %ELSE2593
|
||||
%222 = fcmp oeq float %temp292.11, %100
|
||||
%223 = fcmp olt float %100, %102
|
||||
%224 = and i1 %222, %223
|
||||
br i1 %224, label %ENDIF2594, label %ELSE2632
|
||||
%tmp222 = fcmp oeq float %temp292.11, %tmp100
|
||||
%tmp223 = fcmp olt float %tmp100, %tmp102
|
||||
%tmp224 = and i1 %tmp222, %tmp223
|
||||
br i1 %tmp224, label %ENDIF2594, label %ELSE2632
|
||||
|
||||
ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
|
||||
%temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
|
||||
%225 = fmul float %temp894.2, undef
|
||||
%tmp225 = fmul float %temp894.2, undef
|
||||
br label %ENDIF2564
|
||||
|
||||
ELSE2632: ; preds = %ELSE2596
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2650
|
||||
|
||||
ELSE2650: ; preds = %ELSE2632
|
||||
%226 = fcmp oeq float %temp292.11, %110
|
||||
%227 = fcmp olt float %110, %111
|
||||
%228 = and i1 %226, %227
|
||||
br i1 %228, label %IF2667, label %ELSE2668
|
||||
%tmp226 = fcmp oeq float %temp292.11, %tmp110
|
||||
%tmp227 = fcmp olt float %tmp110, %tmp111
|
||||
%tmp228 = and i1 %tmp226, %tmp227
|
||||
br i1 %tmp228, label %IF2667, label %ELSE2668
|
||||
|
||||
IF2667: ; preds = %ELSE2650
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2671
|
||||
|
||||
ELSE2668: ; preds = %ELSE2650
|
||||
%229 = fcmp oeq float %temp292.11, %128
|
||||
%230 = fcmp olt float %128, undef
|
||||
%231 = and i1 %229, %230
|
||||
br i1 %231, label %ENDIF2594, label %ELSE2686
|
||||
%tmp229 = fcmp oeq float %temp292.11, %tmp128
|
||||
%tmp230 = fcmp olt float %tmp128, undef
|
||||
%tmp231 = and i1 %tmp229, %tmp230
|
||||
br i1 %tmp231, label %ENDIF2594, label %ELSE2686
|
||||
|
||||
ELSE2671: ; preds = %IF2667
|
||||
br label %ENDIF2594
|
||||
|
||||
ELSE2686: ; preds = %ELSE2668
|
||||
%232 = fcmp oeq float %temp292.11, %145
|
||||
%233 = fcmp olt float %145, undef
|
||||
%234 = and i1 %232, %233
|
||||
br i1 %234, label %ENDIF2594, label %ELSE2704
|
||||
%tmp232 = fcmp oeq float %temp292.11, %tmp145
|
||||
%tmp233 = fcmp olt float %tmp145, undef
|
||||
%tmp234 = and i1 %tmp232, %tmp233
|
||||
br i1 %tmp234, label %ENDIF2594, label %ELSE2704
|
||||
|
||||
ELSE2704: ; preds = %ELSE2686
|
||||
%235 = fcmp oeq float %temp292.11, %180
|
||||
%236 = fcmp olt float %180, undef
|
||||
%237 = and i1 %235, %236
|
||||
br i1 %237, label %ENDIF2594, label %ELSE2740
|
||||
%tmp235 = fcmp oeq float %temp292.11, %tmp180
|
||||
%tmp236 = fcmp olt float %tmp180, undef
|
||||
%tmp237 = and i1 %tmp235, %tmp236
|
||||
br i1 %tmp237, label %ENDIF2594, label %ELSE2740
|
||||
|
||||
ELSE2740: ; preds = %ELSE2704
|
||||
br i1 undef, label %IF2757, label %ELSE2758
|
||||
@ -349,8 +350,8 @@ ELSE2761: ; preds = %IF2757
|
||||
br label %ENDIF2594
|
||||
|
||||
IF2775: ; preds = %ELSE2758
|
||||
%238 = fcmp olt float undef, undef
|
||||
br i1 %238, label %ENDIF2594, label %ELSE2779
|
||||
%tmp238 = fcmp olt float undef, undef
|
||||
br i1 %tmp238, label %ENDIF2594, label %ELSE2779
|
||||
|
||||
ELSE2779: ; preds = %IF2775
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2782
|
||||
@ -359,39 +360,39 @@ ELSE2782: ; preds = %ELSE2779
|
||||
br i1 undef, label %ENDIF2594, label %ELSE2785
|
||||
|
||||
ELSE2785: ; preds = %ELSE2782
|
||||
%239 = fcmp olt float undef, 0.000000e+00
|
||||
br i1 %239, label %ENDIF2594, label %ELSE2788
|
||||
%tmp239 = fcmp olt float undef, 0.000000e+00
|
||||
br i1 %tmp239, label %ENDIF2594, label %ELSE2788
|
||||
|
||||
ELSE2788: ; preds = %ELSE2785
|
||||
%240 = fcmp olt float 0.000000e+00, undef
|
||||
%.2848 = select i1 %240, float -1.000000e+00, float 1.000000e+00
|
||||
%tmp240 = fcmp olt float 0.000000e+00, undef
|
||||
%.2848 = select i1 %tmp240, float -1.000000e+00, float 1.000000e+00
|
||||
br label %ENDIF2594
|
||||
|
||||
ELSE2797: ; preds = %ENDIF2564
|
||||
%241 = fsub float %8, undef
|
||||
%242 = fsub float %9, undef
|
||||
%243 = fsub float %10, undef
|
||||
%244 = fmul float %241, undef
|
||||
%245 = fmul float %242, undef
|
||||
%246 = fmul float %243, undef
|
||||
%247 = fsub float %11, undef
|
||||
%248 = fsub float %12, undef
|
||||
%249 = fsub float %13, undef
|
||||
%250 = fmul float %247, undef
|
||||
%251 = fmul float %248, undef
|
||||
%252 = fmul float %249, undef
|
||||
%253 = call float @llvm.minnum.f32(float %244, float %250)
|
||||
%254 = call float @llvm.minnum.f32(float %245, float %251)
|
||||
%255 = call float @llvm.maxnum.f32(float %246, float %252)
|
||||
%256 = call float @llvm.maxnum.f32(float %253, float %254)
|
||||
%257 = call float @llvm.maxnum.f32(float %256, float undef)
|
||||
%258 = call float @llvm.minnum.f32(float undef, float %255)
|
||||
%259 = fcmp ogt float %257, 0.000000e+00
|
||||
%260 = fcmp olt float %257, 1.000000e+00
|
||||
%261 = and i1 %259, %260
|
||||
%262 = fcmp olt float %257, %258
|
||||
%263 = and i1 %261, %262
|
||||
br i1 %263, label %ENDIF2795, label %ELSE2800
|
||||
%tmp241 = fsub float %tmp8, undef
|
||||
%tmp242 = fsub float %tmp9, undef
|
||||
%tmp243 = fsub float %tmp10, undef
|
||||
%tmp244 = fmul float %tmp241, undef
|
||||
%tmp245 = fmul float %tmp242, undef
|
||||
%tmp246 = fmul float %tmp243, undef
|
||||
%tmp247 = fsub float %tmp11, undef
|
||||
%tmp248 = fsub float %tmp12, undef
|
||||
%tmp249 = fsub float %tmp13, undef
|
||||
%tmp250 = fmul float %tmp247, undef
|
||||
%tmp251 = fmul float %tmp248, undef
|
||||
%tmp252 = fmul float %tmp249, undef
|
||||
%tmp253 = call float @llvm.minnum.f32(float %tmp244, float %tmp250)
|
||||
%tmp254 = call float @llvm.minnum.f32(float %tmp245, float %tmp251)
|
||||
%tmp255 = call float @llvm.maxnum.f32(float %tmp246, float %tmp252)
|
||||
%tmp256 = call float @llvm.maxnum.f32(float %tmp253, float %tmp254)
|
||||
%tmp257 = call float @llvm.maxnum.f32(float %tmp256, float undef)
|
||||
%tmp258 = call float @llvm.minnum.f32(float undef, float %tmp255)
|
||||
%tmp259 = fcmp ogt float %tmp257, 0.000000e+00
|
||||
%tmp260 = fcmp olt float %tmp257, 1.000000e+00
|
||||
%tmp261 = and i1 %tmp259, %tmp260
|
||||
%tmp262 = fcmp olt float %tmp257, %tmp258
|
||||
%tmp263 = and i1 %tmp261, %tmp262
|
||||
br i1 %tmp263, label %ENDIF2795, label %ELSE2800
|
||||
|
||||
ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
|
||||
br label %LOOP
|
||||
@ -400,53 +401,53 @@ ELSE2800: ; preds = %ELSE2797
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2803
|
||||
|
||||
ELSE2803: ; preds = %ELSE2800
|
||||
%264 = fsub float %20, undef
|
||||
%265 = fsub float %21, undef
|
||||
%266 = fsub float %22, undef
|
||||
%267 = fmul float %264, undef
|
||||
%268 = fmul float %265, undef
|
||||
%269 = fmul float %266, 0.000000e+00
|
||||
%270 = fsub float %23, undef
|
||||
%271 = fsub float %24, undef
|
||||
%272 = fsub float %25, undef
|
||||
%273 = fmul float %270, undef
|
||||
%274 = fmul float %271, undef
|
||||
%275 = fmul float %272, undef
|
||||
%276 = call float @llvm.minnum.f32(float %267, float %273)
|
||||
%277 = call float @llvm.maxnum.f32(float %268, float %274)
|
||||
%278 = call float @llvm.maxnum.f32(float %269, float %275)
|
||||
%279 = call float @llvm.maxnum.f32(float %276, float undef)
|
||||
%280 = call float @llvm.maxnum.f32(float %279, float undef)
|
||||
%281 = call float @llvm.minnum.f32(float undef, float %277)
|
||||
%282 = call float @llvm.minnum.f32(float %281, float %278)
|
||||
%283 = fcmp ogt float %280, 0.000000e+00
|
||||
%284 = fcmp olt float %280, 1.000000e+00
|
||||
%285 = and i1 %283, %284
|
||||
%286 = fcmp olt float %280, %282
|
||||
%287 = and i1 %285, %286
|
||||
br i1 %287, label %ENDIF2795, label %ELSE2806
|
||||
%tmp264 = fsub float %tmp20, undef
|
||||
%tmp265 = fsub float %tmp21, undef
|
||||
%tmp266 = fsub float %tmp22, undef
|
||||
%tmp267 = fmul float %tmp264, undef
|
||||
%tmp268 = fmul float %tmp265, undef
|
||||
%tmp269 = fmul float %tmp266, 0.000000e+00
|
||||
%tmp270 = fsub float %tmp23, undef
|
||||
%tmp271 = fsub float %tmp24, undef
|
||||
%tmp272 = fsub float %tmp25, undef
|
||||
%tmp273 = fmul float %tmp270, undef
|
||||
%tmp274 = fmul float %tmp271, undef
|
||||
%tmp275 = fmul float %tmp272, undef
|
||||
%tmp276 = call float @llvm.minnum.f32(float %tmp267, float %tmp273)
|
||||
%tmp277 = call float @llvm.maxnum.f32(float %tmp268, float %tmp274)
|
||||
%tmp278 = call float @llvm.maxnum.f32(float %tmp269, float %tmp275)
|
||||
%tmp279 = call float @llvm.maxnum.f32(float %tmp276, float undef)
|
||||
%tmp280 = call float @llvm.maxnum.f32(float %tmp279, float undef)
|
||||
%tmp281 = call float @llvm.minnum.f32(float undef, float %tmp277)
|
||||
%tmp282 = call float @llvm.minnum.f32(float %tmp281, float %tmp278)
|
||||
%tmp283 = fcmp ogt float %tmp280, 0.000000e+00
|
||||
%tmp284 = fcmp olt float %tmp280, 1.000000e+00
|
||||
%tmp285 = and i1 %tmp283, %tmp284
|
||||
%tmp286 = fcmp olt float %tmp280, %tmp282
|
||||
%tmp287 = and i1 %tmp285, %tmp286
|
||||
br i1 %tmp287, label %ENDIF2795, label %ELSE2806
|
||||
|
||||
ELSE2806: ; preds = %ELSE2803
|
||||
%288 = fsub float %26, undef
|
||||
%289 = fsub float %27, undef
|
||||
%290 = fsub float %28, undef
|
||||
%291 = fmul float %288, undef
|
||||
%292 = fmul float %289, 0.000000e+00
|
||||
%293 = fmul float %290, undef
|
||||
%294 = fsub float %29, undef
|
||||
%295 = fmul float %294, undef
|
||||
%296 = call float @llvm.minnum.f32(float %291, float %295)
|
||||
%297 = call float @llvm.minnum.f32(float %292, float undef)
|
||||
%298 = call float @llvm.maxnum.f32(float %293, float undef)
|
||||
%299 = call float @llvm.maxnum.f32(float %296, float %297)
|
||||
%300 = call float @llvm.maxnum.f32(float %299, float undef)
|
||||
%301 = call float @llvm.minnum.f32(float undef, float %298)
|
||||
%302 = fcmp ogt float %300, 0.000000e+00
|
||||
%303 = fcmp olt float %300, 1.000000e+00
|
||||
%304 = and i1 %302, %303
|
||||
%305 = fcmp olt float %300, %301
|
||||
%306 = and i1 %304, %305
|
||||
br i1 %306, label %ENDIF2795, label %ELSE2809
|
||||
%tmp288 = fsub float %tmp26, undef
|
||||
%tmp289 = fsub float %tmp27, undef
|
||||
%tmp290 = fsub float %tmp28, undef
|
||||
%tmp291 = fmul float %tmp288, undef
|
||||
%tmp292 = fmul float %tmp289, 0.000000e+00
|
||||
%tmp293 = fmul float %tmp290, undef
|
||||
%tmp294 = fsub float %tmp29, undef
|
||||
%tmp295 = fmul float %tmp294, undef
|
||||
%tmp296 = call float @llvm.minnum.f32(float %tmp291, float %tmp295)
|
||||
%tmp297 = call float @llvm.minnum.f32(float %tmp292, float undef)
|
||||
%tmp298 = call float @llvm.maxnum.f32(float %tmp293, float undef)
|
||||
%tmp299 = call float @llvm.maxnum.f32(float %tmp296, float %tmp297)
|
||||
%tmp300 = call float @llvm.maxnum.f32(float %tmp299, float undef)
|
||||
%tmp301 = call float @llvm.minnum.f32(float undef, float %tmp298)
|
||||
%tmp302 = fcmp ogt float %tmp300, 0.000000e+00
|
||||
%tmp303 = fcmp olt float %tmp300, 1.000000e+00
|
||||
%tmp304 = and i1 %tmp302, %tmp303
|
||||
%tmp305 = fcmp olt float %tmp300, %tmp301
|
||||
%tmp306 = and i1 %tmp304, %tmp305
|
||||
br i1 %tmp306, label %ENDIF2795, label %ELSE2809
|
||||
|
||||
ELSE2809: ; preds = %ELSE2806
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2812
|
||||
@ -461,53 +462,42 @@ ELSE2818: ; preds = %ELSE2815
|
||||
br i1 undef, label %ENDIF2795, label %ELSE2821
|
||||
|
||||
ELSE2821: ; preds = %ELSE2818
|
||||
%307 = fsub float %56, undef
|
||||
%308 = fsub float %57, undef
|
||||
%309 = fsub float %58, undef
|
||||
%310 = fmul float %307, undef
|
||||
%311 = fmul float %308, 0.000000e+00
|
||||
%312 = fmul float %309, undef
|
||||
%313 = fsub float %59, undef
|
||||
%314 = fsub float %60, undef
|
||||
%315 = fsub float %61, undef
|
||||
%316 = fmul float %313, undef
|
||||
%317 = fmul float %314, undef
|
||||
%318 = fmul float %315, undef
|
||||
%319 = call float @llvm.maxnum.f32(float %310, float %316)
|
||||
%320 = call float @llvm.maxnum.f32(float %311, float %317)
|
||||
%321 = call float @llvm.maxnum.f32(float %312, float %318)
|
||||
%322 = call float @llvm.minnum.f32(float %319, float %320)
|
||||
%323 = call float @llvm.minnum.f32(float %322, float %321)
|
||||
%324 = fcmp ogt float undef, 0.000000e+00
|
||||
%325 = fcmp olt float undef, 1.000000e+00
|
||||
%326 = and i1 %324, %325
|
||||
%327 = fcmp olt float undef, %323
|
||||
%328 = and i1 %326, %327
|
||||
br i1 %328, label %ENDIF2795, label %ELSE2824
|
||||
%tmp307 = fsub float %tmp56, undef
|
||||
%tmp308 = fsub float %tmp57, undef
|
||||
%tmp309 = fsub float %tmp58, undef
|
||||
%tmp310 = fmul float %tmp307, undef
|
||||
%tmp311 = fmul float %tmp308, 0.000000e+00
|
||||
%tmp312 = fmul float %tmp309, undef
|
||||
%tmp313 = fsub float %tmp59, undef
|
||||
%tmp314 = fsub float %tmp60, undef
|
||||
%tmp315 = fsub float %tmp61, undef
|
||||
%tmp316 = fmul float %tmp313, undef
|
||||
%tmp317 = fmul float %tmp314, undef
|
||||
%tmp318 = fmul float %tmp315, undef
|
||||
%tmp319 = call float @llvm.maxnum.f32(float %tmp310, float %tmp316)
|
||||
%tmp320 = call float @llvm.maxnum.f32(float %tmp311, float %tmp317)
|
||||
%tmp321 = call float @llvm.maxnum.f32(float %tmp312, float %tmp318)
|
||||
%tmp322 = call float @llvm.minnum.f32(float %tmp319, float %tmp320)
|
||||
%tmp323 = call float @llvm.minnum.f32(float %tmp322, float %tmp321)
|
||||
%tmp324 = fcmp ogt float undef, 0.000000e+00
|
||||
%tmp325 = fcmp olt float undef, 1.000000e+00
|
||||
%tmp326 = and i1 %tmp324, %tmp325
|
||||
%tmp327 = fcmp olt float undef, %tmp323
|
||||
%tmp328 = and i1 %tmp326, %tmp327
|
||||
br i1 %tmp328, label %ENDIF2795, label %ELSE2824
|
||||
|
||||
ELSE2824: ; preds = %ELSE2821
|
||||
%.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
|
||||
br label %ENDIF2795
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.floor.f32(float) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.sqrt.f32(float) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.minnum.f32(float, float) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.maxnum.f32(float, float) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -1,16 +1,16 @@
|
||||
; RUN: llc < %s -march=amdgcn -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
|
||||
; RUN: llc -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SIVI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=SIVI %s
|
||||
|
||||
; SMRD load with an immediate offset.
|
||||
; GCN-LABEL: {{^}}smrd0:
|
||||
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
|
||||
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -18,11 +18,11 @@ entry:
|
||||
; GCN-LABEL: {{^}}smrd1:
|
||||
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -33,11 +33,11 @@ entry:
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
; GCN: s_endpgm
|
||||
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -48,11 +48,11 @@ entry:
|
||||
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
|
||||
; TODO: Add VI checks
|
||||
; GCN: s_endpgm
|
||||
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -62,11 +62,11 @@ entry:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
|
||||
define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -76,11 +76,11 @@ entry:
|
||||
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
|
||||
; GCN: s_endpgm
|
||||
define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -88,12 +88,12 @@ entry:
|
||||
; GCN-LABEL: {{^}}smrd_load_const0:
|
||||
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
|
||||
define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
|
||||
define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
|
||||
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
|
||||
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -102,14 +102,15 @@ main_body:
|
||||
; GCN-LABEL: {{^}}smrd_load_const1:
|
||||
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
|
||||
define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
|
||||
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
|
||||
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1020)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; SMRD load using the load.const intrinsic with an offset greater than the
|
||||
; largets possible immediate.
|
||||
; immediate offset.
|
||||
@ -118,12 +119,12 @@ main_body:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
|
||||
define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
|
||||
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
|
||||
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1024)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -133,12 +134,12 @@ main_body:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
|
||||
define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
|
||||
define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
|
||||
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
|
||||
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572)
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048572)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -148,18 +149,17 @@ main_body:
|
||||
; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
|
||||
define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
|
||||
%21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
|
||||
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576)
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048576)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -107,7 +107,7 @@ endif: ; preds = %else, %if
|
||||
%export = phi float [ %lds_data, %if ], [ %interp, %else ]
|
||||
%tmp4 = call i32 @llvm.SI.packf16(float %export, float %export)
|
||||
%tmp5 = bitcast i32 %tmp4 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp5, float %tmp5, float %tmp5, float %tmp5)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp5, float %tmp5, float %tmp5, float %tmp5, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -205,11 +205,9 @@ ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
|
||||
|
||||
declare i32 @llvm.SI.packf16(float, float) readnone
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -1,11 +1,11 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; FIXME: Move this to sgpr-copy.ll when this is fixed on VI.
|
||||
; Make sure that when we split an smrd instruction in order to move it to
|
||||
; the VALU, we are also moving its users to the VALU.
|
||||
; CHECK-LABEL: {{^}}split_smrd_add_worklist:
|
||||
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
|
||||
; GCN-LABEL: {{^}}split_smrd_add_worklist:
|
||||
; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
|
||||
bb:
|
||||
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
|
||||
@ -24,24 +24,20 @@ bb3: ; preds = %bb
|
||||
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tmp10 = extractelement <4 x float> %tmp9, i32 0
|
||||
%tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
|
||||
%tmp13 = bitcast i32 %tmp12 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp13, float undef, float undef)
|
||||
%tmp13 = bitcast i32 %tmp12 to <2 x half>
|
||||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = !{!1, !1, i64 0, i32 1}
|
||||
!1 = !{!"const", !3}
|
||||
!2 = !{!1, !1, i64 0}
|
||||
!3 = !{!"tbaa root"}
|
||||
!1 = !{!"const", !2}
|
||||
!2 = !{!"tbaa root"}
|
||||
|
@ -1,39 +1,37 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -o - %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; SI-LABEL:{{^}}row_filter_C1_D0:
|
||||
; SI: s_endpgm
|
||||
; Function Attrs: nounwind
|
||||
; GCN-LABEL:{{^}}row_filter_C1_D0:
|
||||
define void @row_filter_C1_D0() {
|
||||
entry:
|
||||
br i1 undef, label %for.inc.1, label %do.body.preheader
|
||||
|
||||
do.body.preheader: ; preds = %entry
|
||||
%0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
|
||||
%tmp = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
|
||||
br i1 undef, label %do.body56.1, label %do.body90
|
||||
|
||||
do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
|
||||
%1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ]
|
||||
%2 = insertelement <4 x i32> %1, i32 undef, i32 2
|
||||
%3 = insertelement <4 x i32> %2, i32 undef, i32 3
|
||||
%tmp1 = phi <4 x i32> [ %tmp6, %do.body56.2 ], [ %tmp5, %do.body56.1 ], [ %tmp, %do.body.preheader ]
|
||||
%tmp2 = insertelement <4 x i32> %tmp1, i32 undef, i32 2
|
||||
%tmp3 = insertelement <4 x i32> %tmp2, i32 undef, i32 3
|
||||
br i1 undef, label %do.body124.1, label %do.body.1562.preheader
|
||||
|
||||
do.body.1562.preheader: ; preds = %do.body124.1, %do.body90
|
||||
%storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ]
|
||||
%4 = insertelement <4 x i32> undef, i32 undef, i32 1
|
||||
%storemerge = phi <4 x i32> [ %tmp3, %do.body90 ], [ %tmp7, %do.body124.1 ]
|
||||
%tmp4 = insertelement <4 x i32> undef, i32 undef, i32 1
|
||||
br label %for.inc.1
|
||||
|
||||
do.body56.1: ; preds = %do.body.preheader
|
||||
%5 = insertelement <4 x i32> %0, i32 undef, i32 1
|
||||
%tmp5 = insertelement <4 x i32> %tmp, i32 undef, i32 1
|
||||
%or.cond472.1 = or i1 undef, undef
|
||||
br i1 %or.cond472.1, label %do.body56.2, label %do.body90
|
||||
|
||||
do.body56.2: ; preds = %do.body56.1
|
||||
%6 = insertelement <4 x i32> %5, i32 undef, i32 1
|
||||
%tmp6 = insertelement <4 x i32> %tmp5, i32 undef, i32 1
|
||||
br label %do.body90
|
||||
|
||||
do.body124.1: ; preds = %do.body90
|
||||
%7 = insertelement <4 x i32> %3, i32 undef, i32 3
|
||||
%tmp7 = insertelement <4 x i32> %tmp3, i32 undef, i32 3
|
||||
br label %do.body.1562.preheader
|
||||
|
||||
for.inc.1: ; preds = %do.body.1562.preheader, %entry
|
||||
@ -42,8 +40,8 @@ for.inc.1: ; preds = %do.body.1562.prehea
|
||||
unreachable
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}foo:
|
||||
; SI: s_endpgm
|
||||
; GCN-LABEL: {{^}}foo:
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_ps void @foo() #0 {
|
||||
bb:
|
||||
br i1 undef, label %bb2, label %bb1
|
||||
@ -78,9 +76,9 @@ bb13: ; preds = %bb2
|
||||
bb14: ; preds = %bb27, %bb24, %bb9
|
||||
%tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]
|
||||
%tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
|
||||
%tmp17 = fmul float 10.5, %tmp16
|
||||
%tmp18 = fmul float 11.5, %tmp15
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp18, float %tmp17, float %tmp17, float %tmp17)
|
||||
%tmp17 = fmul float 1.050000e+01, %tmp16
|
||||
%tmp18 = fmul float 1.150000e+01, %tmp15
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) #0
|
||||
ret void
|
||||
|
||||
bb23: ; preds = %bb13
|
||||
@ -97,13 +95,8 @@ bb27: ; preds = %bb24
|
||||
br label %bb14
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -5,17 +5,19 @@
|
||||
; FUNC-LABEL: {{^}}udiv_i32:
|
||||
; EG-NOT: SETGE_INT
|
||||
; EG: CF_END
|
||||
|
||||
; SI: v_rcp_iflag_f32_e32
|
||||
define void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
|
||||
%a = load i32, i32 addrspace(1) * %in
|
||||
%b = load i32, i32 addrspace(1) * %b_ptr
|
||||
%a = load i32, i32 addrspace(1)* %in
|
||||
%b = load i32, i32 addrspace(1)* %b_ptr
|
||||
%result = udiv i32 %a, %b
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_udiv_i32:
|
||||
|
||||
; SI: v_rcp_iflag_f32_e32
|
||||
define void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
%result = udiv i32 %a, %b
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
@ -30,6 +32,8 @@ define void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
; FUNC-LABEL: {{^}}udiv_v2i32:
|
||||
; EG: CF_END
|
||||
|
||||
; SI: v_rcp_iflag_f32_e32
|
||||
; SI: v_rcp_iflag_f32_e32
|
||||
; SI: s_endpgm
|
||||
define void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
|
||||
@ -158,3 +162,21 @@ define void @scalarize_mulhu_4xi32(<4 x i32> addrspace(1)* nocapture readonly %i
|
||||
store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_udiv2:
|
||||
; SI: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
|
||||
define void @test_udiv2(i32 %p) {
|
||||
%i = udiv i32 %p, 2
|
||||
store volatile i32 %i, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_udiv_3_mulhu:
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
|
||||
; SI: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
define void @test_udiv_3_mulhu(i32 %p) {
|
||||
%i = udiv i32 %p, 3
|
||||
store volatile i32 %i, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
@ -1,13 +0,0 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK: v_rcp_iflag_f32_e32
|
||||
|
||||
define void @test(i32 %p, i32 %q) {
|
||||
%i = udiv i32 %p, %q
|
||||
%r = bitcast i32 %i to float
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
@ -179,39 +179,39 @@ bb24: ; preds = %bb157, %bb
|
||||
br i1 %tmp155, label %bb156, label %bb157
|
||||
|
||||
bb156: ; preds = %bb24
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp12, float %tmp103, float %tmp102, float %tmp101)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %tmp99, float %tmp98, float %tmp97, float %tmp95)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %tmp94, float %tmp93, float %tmp91, float %tmp90)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %tmp89, float %tmp87, float %tmp86, float %tmp85)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %tmp83, float %tmp82, float %tmp81, float %tmp79)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %tmp78, float %tmp77, float %tmp75, float %tmp74)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %tmp73, float %tmp71, float %tmp70, float %tmp69)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %tmp67, float %tmp66, float %tmp65, float %tmp63)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %tmp62, float %tmp61, float %tmp59, float %tmp58)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %tmp57, float %tmp55, float %tmp54, float %tmp53)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %tmp51, float %tmp50, float %tmp49, float %tmp47)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %tmp46, float %tmp45, float %tmp43, float %tmp42)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %tmp41, float %tmp39, float %tmp38, float %tmp37)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %tmp35, float %tmp34, float %tmp33, float %tmp31)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %tmp30, float %tmp29, float %tmp27, float %tmp26)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %tmp25, float %tmp28, float %tmp32, float %tmp36)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %tmp40, float %tmp44, float %tmp48, float %tmp52)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %tmp56, float %tmp60, float %tmp64, float %tmp68)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %tmp72, float %tmp76, float %tmp80, float %tmp84)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %tmp88, float %tmp92, float %tmp96, float %tmp100)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %tmp104, float %tmp105, float %tmp106, float %tmp108)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %tmp109, float %tmp110, float %tmp111, float %tmp112)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %tmp113, float %tmp114, float %tmp115, float %tmp116)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %tmp117, float %tmp118, float %tmp119, float %tmp120)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %tmp121, float %tmp122, float %tmp123, float %tmp124)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %tmp125, float %tmp126, float %tmp127, float %tmp128)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %tmp129, float %tmp130, float %tmp131, float %tmp132)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %tmp133, float %tmp134, float %tmp135, float %tmp136)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %tmp137, float %tmp138, float %tmp139, float %tmp140)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %tmp141, float %tmp142, float %tmp143, float %tmp144)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %tmp145, float %tmp146, float %tmp147, float %tmp148)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %tmp149, float %tmp150, float %tmp151, float %tmp13)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp12, float %tmp103, float %tmp102, float %tmp101, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %tmp99, float %tmp98, float %tmp97, float %tmp95, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %tmp94, float %tmp93, float %tmp91, float %tmp90, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %tmp89, float %tmp87, float %tmp86, float %tmp85, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %tmp83, float %tmp82, float %tmp81, float %tmp79, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %tmp78, float %tmp77, float %tmp75, float %tmp74, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %tmp73, float %tmp71, float %tmp70, float %tmp69, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %tmp67, float %tmp66, float %tmp65, float %tmp63, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp62, float %tmp61, float %tmp59, float %tmp58, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %tmp57, float %tmp55, float %tmp54, float %tmp53, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %tmp51, float %tmp50, float %tmp49, float %tmp47, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %tmp46, float %tmp45, float %tmp43, float %tmp42, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 44, i32 15, float %tmp41, float %tmp39, float %tmp38, float %tmp37, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 45, i32 15, float %tmp35, float %tmp34, float %tmp33, float %tmp31, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 46, i32 15, float %tmp30, float %tmp29, float %tmp27, float %tmp26, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 47, i32 15, float %tmp25, float %tmp28, float %tmp32, float %tmp36, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 48, i32 15, float %tmp40, float %tmp44, float %tmp48, float %tmp52, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 49, i32 15, float %tmp56, float %tmp60, float %tmp64, float %tmp68, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 50, i32 15, float %tmp72, float %tmp76, float %tmp80, float %tmp84, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 51, i32 15, float %tmp88, float %tmp92, float %tmp96, float %tmp100, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 52, i32 15, float %tmp104, float %tmp105, float %tmp106, float %tmp108, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 53, i32 15, float %tmp109, float %tmp110, float %tmp111, float %tmp112, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 54, i32 15, float %tmp113, float %tmp114, float %tmp115, float %tmp116, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 55, i32 15, float %tmp117, float %tmp118, float %tmp119, float %tmp120, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 56, i32 15, float %tmp121, float %tmp122, float %tmp123, float %tmp124, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 57, i32 15, float %tmp125, float %tmp126, float %tmp127, float %tmp128, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 58, i32 15, float %tmp129, float %tmp130, float %tmp131, float %tmp132, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 59, i32 15, float %tmp133, float %tmp134, float %tmp135, float %tmp136, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 60, i32 15, float %tmp137, float %tmp138, float %tmp139, float %tmp140, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 61, i32 15, float %tmp141, float %tmp142, float %tmp143, float %tmp144, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 62, i32 15, float %tmp145, float %tmp146, float %tmp147, float %tmp148, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float %tmp149, float %tmp150, float %tmp151, float %tmp13, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 true, i1 false) #0
|
||||
ret void
|
||||
|
||||
bb157: ; preds = %bb24
|
||||
@ -482,15 +482,11 @@ bb157: ; preds = %bb24
|
||||
br label %bb24
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -11,7 +11,7 @@
|
||||
; DEFAULT: exp
|
||||
; DEFAULT: s_waitcnt lgkmcnt(0)
|
||||
; DEFAULT: s_endpgm
|
||||
define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) {
|
||||
define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
|
||||
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
@ -20,8 +20,7 @@ main_body:
|
||||
%tmp13 = extractelement <4 x float> %tmp11, i32 1
|
||||
call void @llvm.amdgcn.s.barrier() #1
|
||||
%tmp14 = extractelement <4 x float> %tmp11, i32 2
|
||||
; %tmp15 = extractelement <4 x float> %tmp11, i32 3
|
||||
%tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
|
||||
%tmp15 = load float, float addrspace(2)* %constptr, align 4
|
||||
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
|
||||
%tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
|
||||
%tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
|
||||
@ -29,8 +28,8 @@ main_body:
|
||||
%tmp20 = extractelement <4 x float> %tmp18, i32 1
|
||||
%tmp21 = extractelement <4 x float> %tmp18, i32 2
|
||||
%tmp22 = extractelement <4 x float> %tmp18, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp12, float %tmp13, float %tmp14, float %tmp15)
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 false, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp12, float %tmp13, float %tmp14, float %tmp15, i1 true, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -44,40 +43,34 @@ main_body:
|
||||
; ILPMAX: s_waitcnt vmcnt(1)
|
||||
; ILPMAX: s_waitcnt vmcnt(0)
|
||||
; ILPMAX: s_endpgm
|
||||
|
||||
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
|
||||
byval, i32 inreg, i32 inreg, i32, i32, i32, i32) {
|
||||
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
main_body:
|
||||
%11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
|
||||
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
|
||||
%13 = add i32 %5, %7
|
||||
%14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
|
||||
%15 = extractelement <4 x float> %14, i32 0
|
||||
%16 = extractelement <4 x float> %14, i32 1
|
||||
%17 = extractelement <4 x float> %14, i32 2
|
||||
%18 = extractelement <4 x float> %14, i32 3
|
||||
%19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
|
||||
%20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
|
||||
%21 = add i32 %5, %7
|
||||
%22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
|
||||
%23 = extractelement <4 x float> %22, i32 0
|
||||
%24 = extractelement <4 x float> %22, i32 1
|
||||
%25 = extractelement <4 x float> %22, i32 2
|
||||
%26 = extractelement <4 x float> %22, i32 3
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
|
||||
%tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
|
||||
%tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
|
||||
%tmp12 = add i32 %arg5, %arg7
|
||||
%tmp13 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp11, i32 0, i32 %tmp12)
|
||||
%tmp14 = extractelement <4 x float> %tmp13, i32 0
|
||||
%tmp15 = extractelement <4 x float> %tmp13, i32 1
|
||||
%tmp16 = extractelement <4 x float> %tmp13, i32 2
|
||||
%tmp17 = extractelement <4 x float> %tmp13, i32 3
|
||||
%tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 1
|
||||
%tmp19 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp18, align 16, !tbaa !0
|
||||
%tmp20 = add i32 %arg5, %arg7
|
||||
%tmp21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp19, i32 0, i32 %tmp20)
|
||||
%tmp22 = extractelement <4 x float> %tmp21, i32 0
|
||||
%tmp23 = extractelement <4 x float> %tmp21, i32 1
|
||||
%tmp24 = extractelement <4 x float> %tmp21, i32 2
|
||||
%tmp25 = extractelement <4 x float> %tmp21, i32 3
|
||||
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp14, float %tmp15, float %tmp16, float %tmp17, i1 true, i1 false) #0
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp22, float %tmp23, float %tmp24, float %tmp25, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Function Attrs: convergent nounwind
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { convergent nounwind }
|
||||
attributes #2 = { nounwind readnone }
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=VI %s
|
||||
|
||||
; Check that WQM isn't triggered by image load/store intrinsics.
|
||||
;
|
||||
@ -25,9 +25,7 @@ main_body:
|
||||
%c.3 = extractelement <4 x i32> %c.2, i32 0
|
||||
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
|
||||
%data = load float, float addrspace(1)* %gep
|
||||
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
|
||||
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %data, float undef, float undef, float undef, i1 true, i1 true) #1
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -500,7 +498,7 @@ end:
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
|
||||
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
|
||||
@ -512,8 +510,7 @@ declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i3
|
||||
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
||||
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
|
||||
|
||||
declare void @llvm.AMDGPU.kill(float)
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
declare void @llvm.AMDGPU.kill(float) #1
|
||||
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind readonly }
|
||||
|
@ -6,46 +6,51 @@
|
||||
|
||||
target triple = "amdgcn--"
|
||||
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2
|
||||
|
||||
define amdgpu_vs void @wrapper(i32 inreg, i32) {
|
||||
define amdgpu_vs void @wrapper(i32 inreg %arg, i32 %arg1) {
|
||||
main_body:
|
||||
%2 = add i32 %1, %0
|
||||
%3 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %2)
|
||||
%4 = extractelement <4 x float> %3, i32 1
|
||||
%5 = fptosi float %4 to i32
|
||||
%6 = insertelement <2 x i32> undef, i32 %5, i32 1
|
||||
%tmp = add i32 %arg1, %arg
|
||||
%tmp2 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %tmp)
|
||||
%tmp3 = extractelement <4 x float> %tmp2, i32 1
|
||||
%tmp4 = fptosi float %tmp3 to i32
|
||||
%tmp5 = insertelement <2 x i32> undef, i32 %tmp4, i32 1
|
||||
br label %loop11.i
|
||||
|
||||
loop11.i: ; preds = %endif46.i, %main_body
|
||||
%7 = phi i32 [ 0, %main_body ], [ %15, %endif46.i ]
|
||||
%8 = icmp sgt i32 %7, 999
|
||||
br i1 %8, label %main.exit, label %if16.i
|
||||
%tmp6 = phi i32 [ 0, %main_body ], [ %tmp14, %endif46.i ]
|
||||
%tmp7 = icmp sgt i32 %tmp6, 999
|
||||
br i1 %tmp7, label %main.exit, label %if16.i
|
||||
|
||||
if16.i: ; preds = %loop11.i
|
||||
%9 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %6, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
|
||||
%10 = extractelement <4 x float> %9, i32 0
|
||||
%11 = fcmp ult float 0.000000e+00, %10
|
||||
br i1 %11, label %if28.i, label %endif46.i
|
||||
%tmp8 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp5, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
|
||||
%tmp9 = extractelement <4 x float> %tmp8, i32 0
|
||||
%tmp10 = fcmp ult float 0.000000e+00, %tmp9
|
||||
br i1 %tmp10, label %if28.i, label %endif46.i
|
||||
|
||||
if28.i: ; preds = %if16.i
|
||||
%12 = bitcast float %10 to i32
|
||||
%13 = shl i32 %12, 16
|
||||
%14 = bitcast i32 %13 to float
|
||||
%tmp11 = bitcast float %tmp9 to i32
|
||||
%tmp12 = shl i32 %tmp11, 16
|
||||
%tmp13 = bitcast i32 %tmp12 to float
|
||||
br label %main.exit
|
||||
|
||||
endif46.i: ; preds = %if16.i
|
||||
%15 = add i32 %7, 1
|
||||
%tmp14 = add i32 %tmp6, 1
|
||||
br label %loop11.i
|
||||
|
||||
main.exit: ; preds = %if28.i, %loop11.i
|
||||
%16 = phi float [ %14, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %16, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000)
|
||||
%tmp15 = phi float [ %tmp13, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp15, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind readonly }
|
||||
attributes #2 = { nounwind }
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind readonly }
|
||||
|
Loading…
Reference in New Issue
Block a user