1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel

Currently the default C calling convention functions are treated
the same as compute kernels. Make this explicit so the default
calling convention can be changed to a non-kernel.

Converted with perl -pi -e 's/define void/define amdgpu_kernel void/'
on the relevant test directories (and undoing in one place that actually
wanted a non-kernel).

llvm-svn: 298444
This commit is contained in:
Matt Arsenault 2017-03-21 21:39:51 +00:00
parent 5a93187aed
commit dd9ab77318
714 changed files with 6248 additions and 6248 deletions

View File

@ -3,7 +3,7 @@
; CHECK: 'add_i32'
; CHECK: estimated cost of 1 for {{.*}} add i32
define void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%add = add i32 %vec, %b
store i32 %add, i32 addrspace(1)* %out
@ -12,7 +12,7 @@ define void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #
; CHECK: 'add_v2i32'
; CHECK: estimated cost of 2 for {{.*}} add <2 x i32>
define void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 {
define amdgpu_kernel void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 {
%vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
%add = add <2 x i32> %vec, %b
store <2 x i32> %add, <2 x i32> addrspace(1)* %out
@ -21,7 +21,7 @@ define void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %va
; CHECK: 'add_v3i32'
; CHECK: estimated cost of 3 for {{.*}} add <3 x i32>
define void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 {
define amdgpu_kernel void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 {
%vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
%add = add <3 x i32> %vec, %b
store <3 x i32> %add, <3 x i32> addrspace(1)* %out
@ -30,7 +30,7 @@ define void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %va
; CHECK: 'add_v4i32'
; CHECK: estimated cost of 4 for {{.*}} add <4 x i32>
define void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 {
define amdgpu_kernel void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 {
%vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
%add = add <4 x i32> %vec, %b
store <4 x i32> %add, <4 x i32> addrspace(1)* %out
@ -39,7 +39,7 @@ define void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %va
; CHECK: 'add_i64'
; CHECK: estimated cost of 2 for {{.*}} add i64
define void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%add = add i64 %vec, %b
store i64 %add, i64 addrspace(1)* %out
@ -48,7 +48,7 @@ define void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #
; CHECK: 'add_v2i64'
; CHECK: estimated cost of 4 for {{.*}} add <2 x i64>
define void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 {
define amdgpu_kernel void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 {
%vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
%add = add <2 x i64> %vec, %b
store <2 x i64> %add, <2 x i64> addrspace(1)* %out
@ -57,7 +57,7 @@ define void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %va
; CHECK: 'add_v3i64'
; CHECK: estimated cost of 6 for {{.*}} add <3 x i64>
define void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 {
define amdgpu_kernel void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 {
%vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
%add = add <3 x i64> %vec, %b
store <3 x i64> %add, <3 x i64> addrspace(1)* %out
@ -66,7 +66,7 @@ define void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %va
; CHECK: 'add_v4i64'
; CHECK: estimated cost of 8 for {{.*}} add <4 x i64>
define void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 {
define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 {
%vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
%add = add <4 x i64> %vec, %b
store <4 x i64> %add, <4 x i64> addrspace(1)* %out
@ -75,7 +75,7 @@ define void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %va
; CHECK: 'add_v16i64'
; CHECK: estimated cost of 32 for {{.*}} add <16 x i64>
define void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 {
define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 {
%vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr
%add = add <16 x i64> %vec, %b
store <16 x i64> %add, <16 x i64> addrspace(1)* %out
@ -84,7 +84,7 @@ define void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)*
; CHECK: 'add_i16'
; CHECK: estimated cost of 1 for {{.*}} add i16
define void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
define amdgpu_kernel void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
%vec = load i16, i16 addrspace(1)* %vaddr
%add = add i16 %vec, %b
store i16 %add, i16 addrspace(1)* %out
@ -93,7 +93,7 @@ define void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #
; CHECK: 'add_v2i16'
; CHECK: estimated cost of 2 for {{.*}} add <2 x i16>
define void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
define amdgpu_kernel void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
%add = add <2 x i16> %vec, %b
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
@ -102,7 +102,7 @@ define void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %va
; CHECK: 'sub_i32'
; CHECK: estimated cost of 1 for {{.*}} sub i32
define void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%sub = sub i32 %vec, %b
store i32 %sub, i32 addrspace(1)* %out
@ -111,7 +111,7 @@ define void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #
; CHECK: 'sub_i64'
; CHECK: estimated cost of 2 for {{.*}} sub i64
define void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%sub = sub i64 %vec, %b
store i64 %sub, i64 addrspace(1)* %out
@ -119,7 +119,7 @@ define void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #
}
; CHECK: 'sub_i16'
; CHECK: estimated cost of 1 for {{.*}} sub i16
define void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
define amdgpu_kernel void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
%vec = load i16, i16 addrspace(1)* %vaddr
%sub = sub i16 %vec, %b
store i16 %sub, i16 addrspace(1)* %out
@ -128,7 +128,7 @@ define void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #
; CHECK: 'sub_v2i16'
; CHECK: estimated cost of 2 for {{.*}} sub <2 x i16>
define void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
define amdgpu_kernel void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
%sub = sub <2 x i16> %vec, %b
store <2 x i16> %sub, <2 x i16> addrspace(1)* %out

View File

@ -2,7 +2,7 @@
; CHECK: 'or_i32'
; CHECK: estimated cost of 1 for {{.*}} or i32
define void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%or = or i32 %vec, %b
store i32 %or, i32 addrspace(1)* %out
@ -11,7 +11,7 @@ define void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0
; CHECK: 'or_i64'
; CHECK: estimated cost of 2 for {{.*}} or i64
define void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%or = or i64 %vec, %b
store i64 %or, i64 addrspace(1)* %out
@ -20,7 +20,7 @@ define void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0
; CHECK: 'xor_i32'
; CHECK: estimated cost of 1 for {{.*}} xor i32
define void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%or = xor i32 %vec, %b
store i32 %or, i32 addrspace(1)* %out
@ -29,7 +29,7 @@ define void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #
; CHECK: 'xor_i64'
; CHECK: estimated cost of 2 for {{.*}} xor i64
define void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%or = xor i64 %vec, %b
store i64 %or, i64 addrspace(1)* %out
@ -39,7 +39,7 @@ define void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #
; CHECK: 'and_i32'
; CHECK: estimated cost of 1 for {{.*}} and i32
define void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%or = and i32 %vec, %b
store i32 %or, i32 addrspace(1)* %out
@ -48,7 +48,7 @@ define void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #
; CHECK: 'and_i64'
; CHECK: estimated cost of 2 for {{.*}} and i64
define void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%or = and i64 %vec, %b
store i64 %or, i64 addrspace(1)* %out

View File

@ -4,7 +4,7 @@
; CHECK: estimated cost of 10 for instruction: br i1
; CHECK: estimated cost of 10 for instruction: br label
; CHECK: estimated cost of 10 for instruction: ret void
define void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
bb0:
br i1 undef, label %bb1, label %bb2
@ -21,7 +21,7 @@ bb2:
; CHECK: 'test_switch_cost'
; CHECK: Unknown cost for instruction: switch
define void @test_switch_cost(i32 %a) #0 {
define amdgpu_kernel void @test_switch_cost(i32 %a) #0 {
entry:
switch i32 %a, label %default [
i32 0, label %case0

View File

@ -2,7 +2,7 @@
; CHECK: 'extractelement_v2i32'
; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i32>
define void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
%vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
%elt = extractelement <2 x i32> %vec, i32 1
store i32 %elt, i32 addrspace(1)* %out
@ -11,7 +11,7 @@ define void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)
; CHECK: 'extractelement_v2f32'
; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x float>
define void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) {
%vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
%elt = extractelement <2 x float> %vec, i32 1
store float %elt, float addrspace(1)* %out
@ -20,7 +20,7 @@ define void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspac
; CHECK: 'extractelement_v3i32'
; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i32>
define void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr) {
%vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
%elt = extractelement <3 x i32> %vec, i32 1
store i32 %elt, i32 addrspace(1)* %out
@ -29,7 +29,7 @@ define void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)
; CHECK: 'extractelement_v4i32'
; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i32>
define void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr) {
%vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
%elt = extractelement <4 x i32> %vec, i32 1
store i32 %elt, i32 addrspace(1)* %out
@ -38,7 +38,7 @@ define void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)
; CHECK: 'extractelement_v8i32'
; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i32>
define void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr) {
%vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
%elt = extractelement <8 x i32> %vec, i32 1
store i32 %elt, i32 addrspace(1)* %out
@ -48,7 +48,7 @@ define void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)
; FIXME: Should be non-0
; CHECK: 'extractelement_v8i32_dynindex'
; CHECK: estimated cost of 2 for {{.*}} extractelement <8 x i32>
define void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr, i32 %idx) {
define amdgpu_kernel void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr, i32 %idx) {
%vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
%elt = extractelement <8 x i32> %vec, i32 %idx
store i32 %elt, i32 addrspace(1)* %out
@ -57,7 +57,7 @@ define void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> add
; CHECK: 'extractelement_v2i64'
; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i64>
define void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
%vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
%elt = extractelement <2 x i64> %vec, i64 1
store i64 %elt, i64 addrspace(1)* %out
@ -66,7 +66,7 @@ define void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)
; CHECK: 'extractelement_v3i64'
; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i64>
define void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr) {
%vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
%elt = extractelement <3 x i64> %vec, i64 1
store i64 %elt, i64 addrspace(1)* %out
@ -75,7 +75,7 @@ define void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)
; CHECK: 'extractelement_v4i64'
; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i64>
define void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr) {
%vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
%elt = extractelement <4 x i64> %vec, i64 1
store i64 %elt, i64 addrspace(1)* %out
@ -84,7 +84,7 @@ define void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)
; CHECK: 'extractelement_v8i64'
; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i64>
define void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr) {
%vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr
%elt = extractelement <8 x i64> %vec, i64 1
store i64 %elt, i64 addrspace(1)* %out
@ -93,7 +93,7 @@ define void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)
; CHECK: 'extractelement_v4i8'
; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i8>
define void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %vaddr) {
%vec = load <4 x i8>, <4 x i8> addrspace(1)* %vaddr
%elt = extractelement <4 x i8> %vec, i8 1
store i8 %elt, i8 addrspace(1)* %out
@ -102,7 +102,7 @@ define void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %
; CHECK: 'extractelement_v2i16'
; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i16>
define void @extractelement_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
define amdgpu_kernel void @extractelement_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
%vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
%elt = extractelement <2 x i16> %vec, i16 1
store i16 %elt, i16 addrspace(1)* %out

View File

@ -2,7 +2,7 @@
; CHECK: 'fabs_f32'
; CHECK: estimated cost of 0 for {{.*}} call float @llvm.fabs.f32
define void @fabs_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 {
%vec = load float, float addrspace(1)* %vaddr
%fabs = call float @llvm.fabs.f32(float %vec) #1
store float %fabs, float addrspace(1)* %out
@ -11,7 +11,7 @@ define void @fabs_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 {
; CHECK: 'fabs_v2f32'
; CHECK: estimated cost of 0 for {{.*}} call <2 x float> @llvm.fabs.v2f32
define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 {
%vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %vec) #1
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@ -20,7 +20,7 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)
; CHECK: 'fabs_v3f32'
; CHECK: estimated cost of 0 for {{.*}} call <3 x float> @llvm.fabs.v3f32
define void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
%fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %vec) #1
store <3 x float> %fabs, <3 x float> addrspace(1)* %out
@ -29,7 +29,7 @@ define void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)
; CHECK: 'fabs_f64'
; CHECK: estimated cost of 0 for {{.*}} call double @llvm.fabs.f64
define void @fabs_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 {
%vec = load double, double addrspace(1)* %vaddr
%fabs = call double @llvm.fabs.f64(double %vec) #1
store double %fabs, double addrspace(1)* %out
@ -38,7 +38,7 @@ define void @fabs_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0
; CHECK: 'fabs_v2f64'
; CHECK: estimated cost of 0 for {{.*}} call <2 x double> @llvm.fabs.v2f64
define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr) #0 {
%vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %vec) #1
store <2 x double> %fabs, <2 x double> addrspace(1)* %out
@ -47,7 +47,7 @@ define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
; CHECK: 'fabs_v3f64'
; CHECK: estimated cost of 0 for {{.*}} call <3 x double> @llvm.fabs.v3f64
define void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
%fabs = call <3 x double> @llvm.fabs.v3f64(<3 x double> %vec) #1
store <3 x double> %fabs, <3 x double> addrspace(1)* %out
@ -56,7 +56,7 @@ define void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(
; CHECK: 'fabs_f16'
; CHECK: estimated cost of 0 for {{.*}} call half @llvm.fabs.f16
define void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 {
%vec = load half, half addrspace(1)* %vaddr
%fabs = call half @llvm.fabs.f16(half %vec) #1
store half %fabs, half addrspace(1)* %out
@ -65,7 +65,7 @@ define void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 {
; CHECK: 'fabs_v2f16'
; CHECK: estimated cost of 0 for {{.*}} call <2 x half> @llvm.fabs.v2f16
define void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 {
%vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %vec) #1
store <2 x half> %fabs, <2 x half> addrspace(1)* %out
@ -74,7 +74,7 @@ define void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)*
; CHECK: 'fabs_v3f16'
; CHECK: estimated cost of 0 for {{.*}} call <3 x half> @llvm.fabs.v3f16
define void @fabs_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr) #0 {
define amdgpu_kernel void @fabs_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr) #0 {
%vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr
%fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %vec) #1
store <3 x half> %fabs, <3 x half> addrspace(1)* %out

View File

@ -3,7 +3,7 @@
; ALL: 'fadd_f32'
; ALL: estimated cost of 1 for {{.*}} fadd float
define void @fadd_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
define amdgpu_kernel void @fadd_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
%vec = load float, float addrspace(1)* %vaddr
%add = fadd float %vec, %b
store float %add, float addrspace(1)* %out
@ -12,7 +12,7 @@ define void @fadd_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, floa
; ALL: 'fadd_v2f32'
; ALL: estimated cost of 2 for {{.*}} fadd <2 x float>
define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
define amdgpu_kernel void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
%vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
%add = fadd <2 x float> %vec, %b
store <2 x float> %add, <2 x float> addrspace(1)* %out
@ -21,7 +21,7 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)
; ALL: 'fadd_v3f32'
; ALL: estimated cost of 3 for {{.*}} fadd <3 x float>
define void @fadd_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
define amdgpu_kernel void @fadd_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
%add = fadd <3 x float> %vec, %b
store <3 x float> %add, <3 x float> addrspace(1)* %out
@ -31,7 +31,7 @@ define void @fadd_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)
; ALL: 'fadd_f64'
; FASTF64: estimated cost of 2 for {{.*}} fadd double
; SLOWF64: estimated cost of 3 for {{.*}} fadd double
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
define amdgpu_kernel void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
%vec = load double, double addrspace(1)* %vaddr
%add = fadd double %vec, %b
store double %add, double addrspace(1)* %out
@ -41,7 +41,7 @@ define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, do
; ALL: 'fadd_v2f64'
; FASTF64: estimated cost of 4 for {{.*}} fadd <2 x double>
; SLOWF64: estimated cost of 6 for {{.*}} fadd <2 x double>
define void @fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
define amdgpu_kernel void @fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
%vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
%add = fadd <2 x double> %vec, %b
store <2 x double> %add, <2 x double> addrspace(1)* %out
@ -51,7 +51,7 @@ define void @fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
; ALL: 'fadd_v3f64'
; FASTF64: estimated cost of 6 for {{.*}} fadd <3 x double>
; SLOWF64: estimated cost of 9 for {{.*}} fadd <3 x double>
define void @fadd_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
define amdgpu_kernel void @fadd_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
%add = fadd <3 x double> %vec, %b
store <3 x double> %add, <3 x double> addrspace(1)* %out
@ -60,7 +60,7 @@ define void @fadd_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(
; ALL 'fadd_f16'
; ALL estimated cost of 1 for {{.*}} fadd half
define void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
%vec = load half, half addrspace(1)* %vaddr
%add = fadd half %vec, %b
store half %add, half addrspace(1)* %out
@ -69,7 +69,7 @@ define void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %
; ALL 'fadd_v2f16'
; ALL estimated cost of 2 for {{.*}} fadd <2 x half>
define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
%vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
%add = fadd <2 x half> %vec, %b
store <2 x half> %add, <2 x half> addrspace(1)* %out
@ -78,7 +78,7 @@ define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)*
; ALL 'fadd_v4f16'
; ALL estimated cost of 4 for {{.*}} fadd <4 x half>
define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
%vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
%add = fadd <4 x half> %vec, %b
store <4 x half> %add, <4 x half> addrspace(1)* %out

View File

@ -5,7 +5,7 @@
; CHECK: 'fdiv_f32'
; ALL: estimated cost of 10 for {{.*}} fdiv float
define void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
%vec = load float, float addrspace(1)* %vaddr
%add = fdiv float %vec, %b
store float %add, float addrspace(1)* %out
@ -14,7 +14,7 @@ define void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, floa
; ALL: 'fdiv_v2f32'
; ALL: estimated cost of 20 for {{.*}} fdiv <2 x float>
define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
%vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
%add = fdiv <2 x float> %vec, %b
store <2 x float> %add, <2 x float> addrspace(1)* %out
@ -23,7 +23,7 @@ define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)
; ALL: 'fdiv_v3f32'
; ALL: estimated cost of 30 for {{.*}} fdiv <3 x float>
define void @fdiv_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
define amdgpu_kernel void @fdiv_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
%add = fdiv <3 x float> %vec, %b
store <3 x float> %add, <3 x float> addrspace(1)* %out
@ -35,7 +35,7 @@ define void @fdiv_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)
; CISLOWF64: estimated cost of 33 for {{.*}} fdiv double
; SIFASTF64: estimated cost of 32 for {{.*}} fdiv double
; SISLOWF64: estimated cost of 36 for {{.*}} fdiv double
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
define amdgpu_kernel void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
%vec = load double, double addrspace(1)* %vaddr
%add = fdiv double %vec, %b
store double %add, double addrspace(1)* %out
@ -47,7 +47,7 @@ define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, do
; CISLOWF64: estimated cost of 66 for {{.*}} fdiv <2 x double>
; SIFASTF64: estimated cost of 64 for {{.*}} fdiv <2 x double>
; SISLOWF64: estimated cost of 72 for {{.*}} fdiv <2 x double>
define void @fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
define amdgpu_kernel void @fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
%vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
%add = fdiv <2 x double> %vec, %b
store <2 x double> %add, <2 x double> addrspace(1)* %out
@ -59,7 +59,7 @@ define void @fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
; CISLOWF64: estimated cost of 99 for {{.*}} fdiv <3 x double>
; SIFASTF64: estimated cost of 96 for {{.*}} fdiv <3 x double>
; SISLOWF64: estimated cost of 108 for {{.*}} fdiv <3 x double>
define void @fdiv_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
define amdgpu_kernel void @fdiv_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
%add = fdiv <3 x double> %vec, %b
store <3 x double> %add, <3 x double> addrspace(1)* %out
@ -68,7 +68,7 @@ define void @fdiv_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(
; ALL: 'fdiv_f16'
; ALL: estimated cost of 10 for {{.*}} fdiv half
define void @fdiv_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
define amdgpu_kernel void @fdiv_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
%vec = load half, half addrspace(1)* %vaddr
%add = fdiv half %vec, %b
store half %add, half addrspace(1)* %out
@ -77,7 +77,7 @@ define void @fdiv_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %
; ALL: 'fdiv_v2f16'
; ALL: estimated cost of 20 for {{.*}} fdiv <2 x half>
define void @fdiv_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
define amdgpu_kernel void @fdiv_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
%vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
%add = fdiv <2 x half> %vec, %b
store <2 x half> %add, <2 x half> addrspace(1)* %out
@ -86,7 +86,7 @@ define void @fdiv_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)*
; ALL: 'fdiv_v4f16'
; ALL: estimated cost of 40 for {{.*}} fdiv <4 x half>
define void @fdiv_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
define amdgpu_kernel void @fdiv_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
%vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
%add = fdiv <4 x half> %vec, %b
store <4 x half> %add, <4 x half> addrspace(1)* %out

View File

@ -3,7 +3,7 @@
; ALL: 'fmul_f32'
; ALL: estimated cost of 1 for {{.*}} fmul float
define void @fmul_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
define amdgpu_kernel void @fmul_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
%vec = load float, float addrspace(1)* %vaddr
%add = fmul float %vec, %b
store float %add, float addrspace(1)* %out
@ -12,7 +12,7 @@ define void @fmul_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, floa
; ALL: 'fmul_v2f32'
; ALL: estimated cost of 2 for {{.*}} fmul <2 x float>
define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
define amdgpu_kernel void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
%vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
%add = fmul <2 x float> %vec, %b
store <2 x float> %add, <2 x float> addrspace(1)* %out
@ -21,7 +21,7 @@ define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)
; ALL: 'fmul_v3f32'
; ALL: estimated cost of 3 for {{.*}} fmul <3 x float>
define void @fmul_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
define amdgpu_kernel void @fmul_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
%add = fmul <3 x float> %vec, %b
store <3 x float> %add, <3 x float> addrspace(1)* %out
@ -31,7 +31,7 @@ define void @fmul_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)
; ALL: 'fmul_f64'
; FASTF64: estimated cost of 2 for {{.*}} fmul double
; SLOWF64: estimated cost of 3 for {{.*}} fmul double
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
define amdgpu_kernel void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
%vec = load double, double addrspace(1)* %vaddr
%add = fmul double %vec, %b
store double %add, double addrspace(1)* %out
@ -41,7 +41,7 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, do
; ALL: 'fmul_v2f64'
; FASTF64: estimated cost of 4 for {{.*}} fmul <2 x double>
; SLOWF64: estimated cost of 6 for {{.*}} fmul <2 x double>
define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
%vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
%add = fmul <2 x double> %vec, %b
store <2 x double> %add, <2 x double> addrspace(1)* %out
@ -51,7 +51,7 @@ define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
; ALL: 'fmul_v3f64'
; FASTF64: estimated cost of 6 for {{.*}} fmul <3 x double>
; SLOWF64: estimated cost of 9 for {{.*}} fmul <3 x double>
define void @fmul_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
define amdgpu_kernel void @fmul_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
%add = fmul <3 x double> %vec, %b
store <3 x double> %add, <3 x double> addrspace(1)* %out
@ -60,7 +60,7 @@ define void @fmul_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(
; ALL 'fmul_f16'
; ALL estimated cost of 1 for {{.*}} fmul half
define void @fmul_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
define amdgpu_kernel void @fmul_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
%vec = load half, half addrspace(1)* %vaddr
%add = fmul half %vec, %b
store half %add, half addrspace(1)* %out
@ -69,7 +69,7 @@ define void @fmul_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %
; ALL 'fmul_v2f16'
; ALL estimated cost of 2 for {{.*}} fmul <2 x half>
define void @fmul_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
define amdgpu_kernel void @fmul_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
%vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
%add = fmul <2 x half> %vec, %b
store <2 x half> %add, <2 x half> addrspace(1)* %out
@ -78,7 +78,7 @@ define void @fmul_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)*
; ALL 'fmul_v4f16'
; ALL estimated cost of 4 for {{.*}} fmul <4 x half>
define void @fmul_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
define amdgpu_kernel void @fmul_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
%vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
%add = fmul <4 x half> %vec, %b
store <4 x half> %add, <4 x half> addrspace(1)* %out

View File

@ -3,7 +3,7 @@
; ALL: 'fsub_f32'
; ALL: estimated cost of 1 for {{.*}} fsub float
define void @fsub_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
define amdgpu_kernel void @fsub_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
%vec = load float, float addrspace(1)* %vaddr
%add = fsub float %vec, %b
store float %add, float addrspace(1)* %out
@ -12,7 +12,7 @@ define void @fsub_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, floa
; ALL: 'fsub_v2f32'
; ALL: estimated cost of 2 for {{.*}} fsub <2 x float>
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
%vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
%add = fsub <2 x float> %vec, %b
store <2 x float> %add, <2 x float> addrspace(1)* %out
@ -21,7 +21,7 @@ define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)
; ALL: 'fsub_v3f32'
; ALL: estimated cost of 3 for {{.*}} fsub <3 x float>
define void @fsub_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
define amdgpu_kernel void @fsub_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
%add = fsub <3 x float> %vec, %b
store <3 x float> %add, <3 x float> addrspace(1)* %out
@ -31,7 +31,7 @@ define void @fsub_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)
; ALL: 'fsub_f64'
; FASTF64: estimated cost of 2 for {{.*}} fsub double
; SLOWF64: estimated cost of 3 for {{.*}} fsub double
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
define amdgpu_kernel void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
%vec = load double, double addrspace(1)* %vaddr
%add = fsub double %vec, %b
store double %add, double addrspace(1)* %out
@ -41,7 +41,7 @@ define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, do
; ALL: 'fsub_v2f64'
; FASTF64: estimated cost of 4 for {{.*}} fsub <2 x double>
; SLOWF64: estimated cost of 6 for {{.*}} fsub <2 x double>
define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
define amdgpu_kernel void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
%vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
%add = fsub <2 x double> %vec, %b
store <2 x double> %add, <2 x double> addrspace(1)* %out
@ -51,7 +51,7 @@ define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
; ALL: 'fsub_v3f64'
; FASTF64: estimated cost of 6 for {{.*}} fsub <3 x double>
; SLOWF64: estimated cost of 9 for {{.*}} fsub <3 x double>
define void @fsub_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
define amdgpu_kernel void @fsub_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
%add = fsub <3 x double> %vec, %b
store <3 x double> %add, <3 x double> addrspace(1)* %out
@ -60,7 +60,7 @@ define void @fsub_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(
; ALL: 'fsub_f16'
; ALL: estimated cost of 1 for {{.*}} fsub half
define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
define amdgpu_kernel void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
%vec = load half, half addrspace(1)* %vaddr
%add = fsub half %vec, %b
store half %add, half addrspace(1)* %out
@ -69,7 +69,7 @@ define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %
; ALL: 'fsub_v2f16'
; ALL: estimated cost of 2 for {{.*}} fsub <2 x half>
define void @fsub_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
define amdgpu_kernel void @fsub_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
%vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
%add = fsub <2 x half> %vec, %b
store <2 x half> %add, <2 x half> addrspace(1)* %out
@ -78,7 +78,7 @@ define void @fsub_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)*
; ALL: 'fsub_v4f16'
; ALL: estimated cost of 4 for {{.*}} fsub <4 x half>
define void @fsub_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
define amdgpu_kernel void @fsub_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
%vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
%add = fsub <4 x half> %vec, %b
store <4 x half> %add, <4 x half> addrspace(1)* %out

View File

@ -2,7 +2,7 @@
; CHECK: 'insertelement_v2i32'
; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i32>
define void @insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
define amdgpu_kernel void @insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
%vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
%insert = insertelement <2 x i32> %vec, i32 1, i32 123
store <2 x i32> %insert, <2 x i32> addrspace(1)* %out
@ -11,7 +11,7 @@ define void @insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspa
; CHECK: 'insertelement_v2i64'
; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i64>
define void @insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
define amdgpu_kernel void @insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
%vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
%insert = insertelement <2 x i64> %vec, i64 1, i64 123
store <2 x i64> %insert, <2 x i64> addrspace(1)* %out
@ -20,7 +20,7 @@ define void @insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspa
; CHECK: 'insertelement_v2i16'
; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i16>
define void @insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
define amdgpu_kernel void @insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
%vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
%insert = insertelement <2 x i16> %vec, i16 1, i16 123
store <2 x i16> %insert, <2 x i16> addrspace(1)* %out
@ -29,7 +29,7 @@ define void @insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspa
; CHECK: 'insertelement_v2i8'
; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i8>
define void @insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) {
define amdgpu_kernel void @insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) {
%vec = load <2 x i8>, <2 x i8> addrspace(1)* %vaddr
%insert = insertelement <2 x i8> %vec, i8 1, i8 123
store <2 x i8> %insert, <2 x i8> addrspace(1)* %out

View File

@ -2,7 +2,7 @@
; CHECK: 'mul_i32'
; CHECK: estimated cost of 3 for {{.*}} mul i32
define void @mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%mul = mul i32 %vec, %b
store i32 %mul, i32 addrspace(1)* %out
@ -11,7 +11,7 @@ define void @mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #
; CHECK: 'mul_v2i32'
; CHECK: estimated cost of 6 for {{.*}} mul <2 x i32>
define void @mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 {
define amdgpu_kernel void @mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 {
%vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
%mul = mul <2 x i32> %vec, %b
store <2 x i32> %mul, <2 x i32> addrspace(1)* %out
@ -20,7 +20,7 @@ define void @mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %va
; CHECK: 'mul_v3i32'
; CHECK: estimated cost of 9 for {{.*}} mul <3 x i32>
define void @mul_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 {
define amdgpu_kernel void @mul_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 {
%vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
%mul = mul <3 x i32> %vec, %b
store <3 x i32> %mul, <3 x i32> addrspace(1)* %out
@ -29,7 +29,7 @@ define void @mul_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %va
; CHECK: 'mul_v4i32'
; CHECK: estimated cost of 12 for {{.*}} mul <4 x i32>
define void @mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 {
define amdgpu_kernel void @mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 {
%vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
%mul = mul <4 x i32> %vec, %b
store <4 x i32> %mul, <4 x i32> addrspace(1)* %out
@ -38,7 +38,7 @@ define void @mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %va
; CHECK: 'mul_i64'
; CHECK: estimated cost of 16 for {{.*}} mul i64
define void @mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%mul = mul i64 %vec, %b
store i64 %mul, i64 addrspace(1)* %out
@ -47,7 +47,7 @@ define void @mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #
; CHECK: 'mul_v2i64'
; CHECK: estimated cost of 32 for {{.*}} mul <2 x i64>
define void @mul_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 {
define amdgpu_kernel void @mul_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 {
%vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
%mul = mul <2 x i64> %vec, %b
store <2 x i64> %mul, <2 x i64> addrspace(1)* %out
@ -56,7 +56,7 @@ define void @mul_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %va
; CHECK: 'mul_v3i64'
; CHECK: estimated cost of 48 for {{.*}} mul <3 x i64>
define void @mul_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 {
define amdgpu_kernel void @mul_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 {
%vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
%mul = mul <3 x i64> %vec, %b
store <3 x i64> %mul, <3 x i64> addrspace(1)* %out
@ -65,7 +65,7 @@ define void @mul_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %va
; CHECK: 'mul_v4i64'
; CHECK: estimated cost of 64 for {{.*}} mul <4 x i64>
define void @mul_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 {
define amdgpu_kernel void @mul_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 {
%vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
%mul = mul <4 x i64> %vec, %b
store <4 x i64> %mul, <4 x i64> addrspace(1)* %out
@ -75,7 +75,7 @@ define void @mul_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %va
; CHECK: 'mul_v8i64'
; CHECK: estimated cost of 128 for {{.*}} mul <8 x i64>
define void @mul_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr, <8 x i64> %b) #0 {
define amdgpu_kernel void @mul_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr, <8 x i64> %b) #0 {
%vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr
%mul = mul <8 x i64> %vec, %b
store <8 x i64> %mul, <8 x i64> addrspace(1)* %out

View File

@ -3,7 +3,7 @@
; ALL: 'shl_i32'
; ALL: estimated cost of 1 for {{.*}} shl i32
define void @shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%or = shl i32 %vec, %b
store i32 %or, i32 addrspace(1)* %out
@ -13,7 +13,7 @@ define void @shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #
; ALL: 'shl_i64'
; FAST64: estimated cost of 2 for {{.*}} shl i64
; SLOW64: estimated cost of 3 for {{.*}} shl i64
define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%or = shl i64 %vec, %b
store i64 %or, i64 addrspace(1)* %out
@ -22,7 +22,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #
; ALL: 'lshr_i32'
; ALL: estimated cost of 1 for {{.*}} lshr i32
define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%or = lshr i32 %vec, %b
store i32 %or, i32 addrspace(1)* %out
@ -32,7 +32,7 @@ define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b)
; ALL: 'lshr_i64'
; FAST64: estimated cost of 2 for {{.*}} lshr i64
; SLOW64: estimated cost of 3 for {{.*}} lshr i64
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%or = lshr i64 %vec, %b
store i64 %or, i64 addrspace(1)* %out
@ -41,7 +41,7 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b)
; ALL: 'ashr_i32'
; ALL: estimated cost of 1 for {{.*}} ashr i32
define void @ashr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
define amdgpu_kernel void @ashr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
%vec = load i32, i32 addrspace(1)* %vaddr
%or = ashr i32 %vec, %b
store i32 %or, i32 addrspace(1)* %out
@ -51,7 +51,7 @@ define void @ashr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b)
; ALL: 'ashr_i64'
; FAST64: estimated cost of 2 for {{.*}} ashr i64
; SLOW64: estimated cost of 3 for {{.*}} ashr i64
define void @ashr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
define amdgpu_kernel void @ashr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
%vec = load i64, i64 addrspace(1)* %vaddr
%or = ashr i64 %vec, %b
store i64 %or, i64 addrspace(1)* %out

View File

@ -1,7 +1,7 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
define void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
store i32 %swizzle, i32 addrspace(1)* %out, align 4
ret void

View File

@ -5,7 +5,7 @@
; CHECK: DIVERGENT: %tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4
; The post dominator tree does not have a root node in this case
define void @no_return_blocks(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1) #0 {
define amdgpu_kernel void @no_return_blocks(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1) #0 {
bb0:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp2 = sext i32 %tmp to i64

View File

@ -1,7 +1,7 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
define void @unreachable_loop(i32 %tidx) #0 {
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
entry:
unreachable

View File

@ -7,35 +7,35 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
define void @workitem_id_x() #1 {
define amdgpu_kernel void @workitem_id_x() #1 {
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %id.x, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %id.y = call i32 @llvm.amdgcn.workitem.id.y()
define void @workitem_id_y() #1 {
define amdgpu_kernel void @workitem_id_y() #1 {
%id.y = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %id.y, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %id.z = call i32 @llvm.amdgcn.workitem.id.z()
define void @workitem_id_z() #1 {
define amdgpu_kernel void @workitem_id_z() #1 {
%id.z = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %id.z, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
define void @mbcnt_lo() #1 {
define amdgpu_kernel void @mbcnt_lo() #1 {
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
store volatile i32 %mbcnt.lo, i32 addrspace(1)* undef
ret void
}
; CHECK: DIVERGENT: %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
define void @mbcnt_hi() #1 {
define amdgpu_kernel void @mbcnt_hi() #1 {
%mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
store volatile i32 %mbcnt.hi, i32 addrspace(1)* undef
ret void

View File

@ -13,7 +13,7 @@
; FUNC-LABEL: {{^}}local_address_load:
; SI: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32, i32 addrspace(3)* %in
store i32 %0, i32 addrspace(1)* %out
@ -24,7 +24,7 @@ entry:
; SI: s_add_i32 [[SPTR:s[0-9]]]
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
define amdgpu_kernel void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
entry:
%0 = getelementptr i32, i32 addrspace(3)* %in, i32 %offset
%1 = load i32, i32 addrspace(3)* %0
@ -35,7 +35,7 @@ entry:
; FUNC-LABEL: {{^}}local_address_gep_const_offset:
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32, i32 addrspace(3)* %in, i32 1
%1 = load i32, i32 addrspace(3)* %0
@ -48,7 +48,7 @@ entry:
; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32, i32 addrspace(3)* %in, i32 16385
%1 = load i32, i32 addrspace(3)* %0
@ -60,7 +60,7 @@ entry:
; SI: v_cmp_ne_u32
; SI-NOT: v_cmp_ne_u32
; SI: v_cndmask_b32
define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
define amdgpu_kernel void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
%cmp = icmp ne i32 addrspace(3)* %lds, null
%x = select i1 %cmp, i32 123, i32 456
store i32 %x, i32 addrspace(1)* %out
@ -71,7 +71,7 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
; SI: s_mul_i32
; SI-NEXT: s_add_i32
; SI: ds_read_b32
define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
define amdgpu_kernel void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
%ptr = getelementptr [3 x float], [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
%val = load float, float addrspace(3)* %ptr
store float %val, float addrspace(1)* %out
@ -83,7 +83,7 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: ds_read_b32 v{{[0-9]+}}, [[REG]]
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
define amdgpu_kernel void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
%val = load float, float addrspace(3)* @g_lds
store float %val, float addrspace(1)* %out
ret void
@ -95,14 +95,14 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
; FUNC-LABEL: {{^}}global_ptr:
; SI: ds_write_b32
define void @global_ptr() nounwind {
define amdgpu_kernel void @global_ptr() nounwind {
store i32 addrspace(3)* getelementptr ([16383 x i32], [16383 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
; FUNC-LABEL: {{^}}local_address_store:
; SI: ds_write_b32
define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
define amdgpu_kernel void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
store i32 %val, i32 addrspace(3)* %out
ret void
}
@ -111,7 +111,7 @@ define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
; SI: s_add_i32 [[SADDR:s[0-9]+]],
; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
define amdgpu_kernel void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
%gep = getelementptr i32, i32 addrspace(3)* %out, i32 %offset
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
@ -121,7 +121,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
define amdgpu_kernel void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32, i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
@ -132,7 +132,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}}
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
define amdgpu_kernel void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32, i32 addrspace(3)* %out, i32 16385
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void

View File

@ -4,7 +4,7 @@
# REQUIRES: global-isel
--- |
define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
...
---

View File

@ -5,7 +5,7 @@
# REQUIRES: global-isel
--- |
define void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
define amdgpu_kernel void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
...
---

View File

@ -4,7 +4,7 @@
# REQUIRES: global-isel
--- |
define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
...
---

View File

@ -3,12 +3,12 @@
# REQUIRES: global-isel
--- |
define void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
define void @load_global_uniform(i32 addrspace(1)* %ptr1) {
define amdgpu_kernel void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
define amdgpu_kernel void @load_global_uniform(i32 addrspace(1)* %ptr1) {
%tmp0 = load i32, i32 addrspace(1)* %ptr1
ret void
}
define void @load_global_non_uniform(i32 addrspace(1)* %ptr2) {
define amdgpu_kernel void @load_global_non_uniform(i32 addrspace(1)* %ptr2) {
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0
%tmp2 = load i32, i32 addrspace(1)* %tmp1

View File

@ -9,7 +9,7 @@
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
%1 = load i32, i32 addrspace(2)* %0
@ -21,7 +21,7 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
%1 = load i32, i32 addrspace(2)* %0
@ -36,7 +36,7 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
%1 = load i32, i32 addrspace(2)* %0
@ -51,7 +51,7 @@ entry:
; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; XGCN: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
%1 = load i32, i32 addrspace(2)* %0
@ -65,7 +65,7 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
%1 = load i32, i32 addrspace(2)* %0
@ -79,7 +79,7 @@ entry:
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
%1 = load i32, i32 addrspace(2)* %0

View File

@ -3,7 +3,7 @@
; REQUIRES: asserts
; Check that SelectionDAGDumper does not crash on int_SI_if.
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
br i1 %0, label %if, label %else

View File

@ -6,7 +6,7 @@
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@ -23,7 +23,7 @@ define void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0x7b, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_add_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@ -38,7 +38,7 @@ define void @v_test_add_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_add_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@ -53,7 +53,7 @@ define void @v_test_add_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], -1, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_add_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@ -69,7 +69,7 @@ define void @v_test_add_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: buffer_store_dword [[ADD]]
define void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@ -89,7 +89,7 @@ define void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)
; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@ -109,7 +109,7 @@ define void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: buffer_store_dword [[SEXT]]
define void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@ -130,7 +130,7 @@ define void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_test_add_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid

View File

@ -8,7 +8,7 @@
;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
;SI-NOT: [[REG]]
;SI: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
define amdgpu_kernel void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
@ -24,7 +24,7 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1)* %in
%b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
@ -44,7 +44,7 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
%b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
@ -71,7 +71,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
define amdgpu_kernel void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
entry:
%0 = add <8 x i32> %a, %b
store <8 x i32> %0, <8 x i32> addrspace(1)* %out
@ -112,7 +112,7 @@ entry:
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
define amdgpu_kernel void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
entry:
%0 = add <16 x i32> %a, %b
store <16 x i32> %0, <16 x i32> addrspace(1)* %out
@ -129,7 +129,7 @@ entry:
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = add i64 %a, %b
store i64 %0, i64 addrspace(1)* %out
@ -150,7 +150,7 @@ entry:
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
%0 = load i64, i64 addrspace(1)* %in
%1 = add i64 %a, %0
@ -169,7 +169,7 @@ entry:
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
br i1 %0, label %if, label %else

View File

@ -7,7 +7,7 @@
; VI: v_add_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_add_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -27,7 +27,7 @@ define void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(
; VI: s_add_i32
; VI: s_add_i32
define void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
%b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
%add = add <2 x i16> %a, %b
@ -41,7 +41,7 @@ define void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(
; VI: s_add_i32
; VI: s_add_i32
define void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
%add = add <2 x i16> %a, %a
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
@ -54,7 +54,7 @@ define void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrs
; VI: v_add_i32
; VI: v_add_i32
define void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
%add = add <2 x i16> %a, %b
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void
@ -66,7 +66,7 @@ define void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x1c8, v{{[0-9]+}}
define void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -83,7 +83,7 @@ define void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> a
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}}
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffc21, v{{[0-9]+}}
define void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -102,7 +102,7 @@ define void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i1
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]]
; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_e32
define void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -121,7 +121,7 @@ define void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16
; VI-NOT: v_add_u16
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_e32
define void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -141,7 +141,7 @@ define void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspace(1)* %out, <2
; VI-NOT: v_add_u16
; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
; VI: v_or_b32_e32
define void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -173,7 +173,7 @@ define void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x
; VI-NOT: and
; VI-NOT: shl
; VI: buffer_store_dwordx2 v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}}
define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -208,7 +208,7 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; VI: v_add_u16_e32
; VI: buffer_store_dwordx4
define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -236,7 +236,7 @@ define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; VI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
; VI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
; VI: buffer_store_dwordx2
define void @v_test_add_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@ -264,7 +264,7 @@ define void @v_test_add_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
define void @v_test_add_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid

View File

@ -6,7 +6,7 @@
; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
; GCN-NEXT: v_addc_u32_e32 v[[HI:[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]],
define void @test_i128_vreg(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %inA, i128 addrspace(1)* noalias %inB) {
define amdgpu_kernel void @test_i128_vreg(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %inA, i128 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
%a_ptr = getelementptr i128, i128 addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr i128, i128 addrspace(1)* %inB, i32 %tid
@ -23,7 +23,7 @@ define void @test_i128_vreg(i128 addrspace(1)* noalias %out, i128 addrspace(1)*
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: v_addc_u32
define void @sgpr_operand(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
define amdgpu_kernel void @sgpr_operand(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
%foo = load i128, i128 addrspace(1)* %in, align 8
%result = add i128 %foo, %a
store i128 %result, i128 addrspace(1)* %out
@ -35,7 +35,7 @@ define void @sgpr_operand(i128 addrspace(1)* noalias %out, i128 addrspace(1)* no
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: v_addc_u32
define void @sgpr_operand_reversed(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
define amdgpu_kernel void @sgpr_operand_reversed(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
%foo = load i128, i128 addrspace(1)* %in, align 8
%result = add i128 %a, %foo
store i128 %result, i128 addrspace(1)* %out
@ -47,7 +47,7 @@ define void @sgpr_operand_reversed(i128 addrspace(1)* noalias %out, i128 addrspa
; GCN: s_addc_u32
; GCN: s_addc_u32
; GCN: s_addc_u32
define void @test_sreg(i128 addrspace(1)* noalias %out, i128 %a, i128 %b) {
define amdgpu_kernel void @test_sreg(i128 addrspace(1)* noalias %out, i128 %a, i128 %b) {
%result = add i128 %a, %b
store i128 %result, i128 addrspace(1)* %out
ret void

View File

@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() readnone
; SI-LABEL: {{^}}test_i64_vreg:
; SI: v_add_i32
; SI: v_addc_u32
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
define amdgpu_kernel void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
%a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
@ -21,7 +21,7 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
; SI-LABEL: {{^}}sgpr_operand:
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
define amdgpu_kernel void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
%foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %foo, %a
store i64 %result, i64 addrspace(1)* %out
@ -34,7 +34,7 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
; SI-LABEL: {{^}}sgpr_operand_reversed:
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
define amdgpu_kernel void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
%foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %a, %foo
store i64 %result, i64 addrspace(1)* %out
@ -47,7 +47,7 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace
; SI: s_addc_u32
; SI: s_add_u32
; SI: s_addc_u32
define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
define amdgpu_kernel void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
%result = add <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@ -58,7 +58,7 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
; SI: v_addc_u32
; SI: v_add_i32
; SI: v_addc_u32
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
define amdgpu_kernel void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
%a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
@ -76,7 +76,7 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
; SI-NOT: addc
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
define amdgpu_kernel void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = add i64 %b, %a
%trunc = trunc i64 %add to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8

View File

@ -9,7 +9,7 @@ declare void @consume_ptr2int(i32) #0
; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)*
; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32
; CHECK: store i32 %ptr2int, i32 addrspace(1)* %out
define void @addrspacecast_captured(i32 addrspace(1)* %out) #0 {
define amdgpu_kernel void @addrspacecast_captured(i32 addrspace(1)* %out) #0 {
entry:
%data = alloca i32, align 4
%cast = addrspacecast i32* %data to i32 addrspace(4)*
@ -22,7 +22,7 @@ entry:
; CHECK: %data = alloca i32, align 4
; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)*
; CHECK: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %out
define void @addrspacecast_captured_store(i32 addrspace(4)* addrspace(1)* %out) #0 {
define amdgpu_kernel void @addrspacecast_captured_store(i32 addrspace(4)* addrspace(1)* %out) #0 {
entry:
%data = alloca i32, align 4
%cast = addrspacecast i32* %data to i32 addrspace(4)*
@ -35,7 +35,7 @@ entry:
; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)*
; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32
; CHECK: call void @consume_ptr2int(i32 %ptr2int)
define void @addrspacecast_captured_call() #0 {
define amdgpu_kernel void @addrspacecast_captured_call() #0 {
entry:
%data = alloca i32, align 4
%cast = addrspacecast i32* %data to i32 addrspace(4)*

View File

@ -9,57 +9,57 @@ declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrs
@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
; HSA: @store_cast_0_flat_to_group_addrspacecast() #1
define void @store_cast_0_flat_to_group_addrspacecast() #1 {
define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
ret void
}
; HSA: @store_cast_0_group_to_flat_addrspacecast() #2
define void @store_cast_0_group_to_flat_addrspacecast() #1 {
define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)
ret void
}
; HSA: define void @store_constant_cast_group_gv_to_flat() #2
define void @store_constant_cast_group_gv_to_flat() #1 {
; HSA: define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #2
define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*)
ret void
}
; HSA: @store_constant_cast_group_gv_gep_to_flat() #2
define void @store_constant_cast_group_gv_gep_to_flat() #1 {
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
ret void
}
; HSA: @store_constant_cast_global_gv_to_flat() #1
define void @store_constant_cast_global_gv_to_flat() #1 {
define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*)
ret void
}
; HSA: @store_constant_cast_global_gv_gep_to_flat() #1
define void @store_constant_cast_global_gv_gep_to_flat() #1 {
define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
ret void
}
; HSA: @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
%val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
store i32 %val, i32 addrspace(1)* %out
ret void
}
; HSA: @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
%val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
}
; HSA: @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
%val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
%val0 = extractvalue { i32, i1 } %val, 0
store i32 %val0, i32 addrspace(1)* %out
@ -67,28 +67,28 @@ define void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out)
}
; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* %out, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i32 4, i1 false)
ret void
}
; Can't just search the pointer value
; HSA: @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #2
define void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out
ret void
}
; Can't just search pointer types
; HSA: @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #2
define void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out
ret void
}
; Cast group to flat, do GEP, cast back to group
; HSA: @store_constant_cast_group_gv_gep_to_flat_to_group() #2
define void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
ret void
}

View File

@ -28,7 +28,7 @@
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
ret void
@ -58,7 +58,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
%stof = addrspacecast i32* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
ret void
@ -73,7 +73,7 @@ define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
ret void
@ -85,7 +85,7 @@ define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
%ld = load volatile i32, i32 addrspace(4)* %stof
ret void
@ -102,7 +102,7 @@ define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
@ -119,7 +119,7 @@ define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
store volatile i32 0, i32* %ftos
ret void
@ -133,7 +133,7 @@ define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
@ -144,7 +144,7 @@ define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
load volatile i32, i32 addrspace(2)* %ftos
ret void
@ -158,7 +158,7 @@ define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define void @cast_0_group_to_flat_addrspacecast() #0 {
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %cast
ret void
@ -168,7 +168,7 @@ define void @cast_0_group_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define void @cast_0_flat_to_group_addrspacecast() #0 {
define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
@ -179,7 +179,7 @@ define void @cast_0_flat_to_group_addrspacecast() #0 {
; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define void @cast_neg1_group_to_flat_addrspacecast() #0 {
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %cast
ret void
@ -189,7 +189,7 @@ define void @cast_neg1_group_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define void @cast_neg1_flat_to_group_addrspacecast() #0 {
define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
@ -204,7 +204,7 @@ define void @cast_neg1_flat_to_group_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define void @cast_0_private_to_flat_addrspacecast() #0 {
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32* null to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %cast
ret void
@ -214,7 +214,7 @@ define void @cast_0_private_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
define void @cast_0_flat_to_private_addrspacecast() #0 {
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
store volatile i32 7, i32* %cast
ret void
@ -226,7 +226,7 @@ define void @cast_0_flat_to_private_addrspacecast() #0 {
; HSA-LABEL: {{^}}branch_use_flat_i32:
; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
; HSA: s_endpgm
define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
entry:
%cmp = icmp ne i32 %c, 0
br i1 %cmp, label %local, label %global
@ -259,7 +259,7 @@ end:
; HSA: flat_store_dword
; HSA: s_barrier
; HSA: flat_load_dword
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4
%x = call i32 @llvm.amdgcn.workitem.id.x() #2
%pptr = getelementptr i32, i32* %alloca, i32 %x

View File

@ -16,7 +16,7 @@ entry:
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
; SI: s_endpgm
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
%1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
@ -24,7 +24,7 @@ entry:
ret void
}
define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%load = load float, float addrspace(1)* %in, align 4
%fadd32 = fadd float %load, 1.0
%bc = bitcast float %fadd32 to <2 x i16>
@ -33,7 +33,7 @@ define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in)
ret void
}
define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
%add.v2i16 = add <2 x i16> %load, <i16 2, i16 2>
%bc = bitcast <2 x i16> %add.v2i16 to float
@ -42,7 +42,7 @@ define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in)
ret void
}
define void @f32_to_v2f16(<2 x half> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
define amdgpu_kernel void @f32_to_v2f16(<2 x half> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%load = load float, float addrspace(1)* %in, align 4
%fadd32 = fadd float %load, 1.0
%bc = bitcast float %fadd32 to <2 x half>
@ -51,7 +51,7 @@ define void @f32_to_v2f16(<2 x half> addrspace(1)* %out, float addrspace(1)* %in
ret void
}
define void @v2f16_to_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @v2f16_to_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %in) nounwind {
%load = load <2 x half>, <2 x half> addrspace(1)* %in, align 4
%add.v2f16 = fadd <2 x half> %load, <half 2.0, half 2.0>
%bc = bitcast <2 x half> %add.v2f16 to float
@ -60,14 +60,14 @@ define void @v2f16_to_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %in
ret void
}
define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%bc = bitcast <4 x i8> %load to i32
store i32 %bc, i32 addrspace(1)* %out, align 4
ret void
}
define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32, i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %load to <4 x i8>
store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
@ -76,7 +76,7 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
; SI: s_endpgm
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%add = add <2 x i32> %val, <i32 4, i32 9>
%bc = bitcast <2 x i32> %add to double
@ -87,7 +87,7 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
; SI: s_endpgm
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
%val = load double, double addrspace(1)* %in, align 8
%add = fadd double %val, 4.0
%bc = bitcast double %add to <2 x i32>
@ -96,7 +96,7 @@ define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace
}
; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
define void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
@ -112,7 +112,7 @@ end:
}
; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
define void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end

View File

@ -15,7 +15,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; GCN-NOT: v0
; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}}
; GCN: buffer_store_dword [[RESULT]]
define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
%1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0

View File

@ -4,7 +4,7 @@
; NOOP-LABEL: @noop_fdiv_fpmath(
; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
%md.25ulp = fdiv float %a, %b, !fpmath !0
store volatile float %md.25ulp, float addrspace(1)* %out
ret void
@ -18,7 +18,7 @@ define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
%no.md = fdiv float %a, %b
store volatile float %no.md, float addrspace(1)* %out
@ -51,7 +51,7 @@ define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
%no.md = fdiv float 1.0, %x
store volatile float %no.md, float addrspace(1)* %out
@ -89,7 +89,7 @@ define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
%no.md = fdiv <2 x float> %a, %b
store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
@ -120,7 +120,7 @@ define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a,
; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
%no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
@ -158,7 +158,7 @@ define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float>
; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0
; CHECK: store volatile <2 x float> %fast.25ulp
define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
%no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
@ -186,7 +186,7 @@ define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2
; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
; CHECK: store volatile <2 x float> %fast.25ulp
define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
%x.insert = insertelement <2 x float> %x, float 1.0, i32 0
%arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
@ -206,7 +206,7 @@ define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %
; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
%no.md = fdiv float %a, %b
store volatile float %no.md, float addrspace(1)* %out

View File

@ -80,7 +80,7 @@
; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !0
; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !0
; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !0
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
%stack = alloca [5 x i32], align 4
%0 = load i32, i32 addrspace(1)* %in, align 4
@ -102,7 +102,7 @@ entry:
; OPT-LABEL: @high_alignment(
; OPT: getelementptr inbounds [256 x [8 x i32]], [256 x [8 x i32]] addrspace(3)* @high_alignment.stack, i32 0, i32 %{{[0-9]+}}
define void @high_alignment(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
define amdgpu_kernel void @high_alignment(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
%stack = alloca [8 x i32], align 16
%0 = load i32, i32 addrspace(1)* %in, align 4
@ -127,7 +127,7 @@ entry:
; OPT: alloca [5 x i32]
; SI-NOT: ds_write
define void @no_replace_inbounds_gep(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
define amdgpu_kernel void @no_replace_inbounds_gep(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
%stack = alloca [5 x i32], align 4
%0 = load i32, i32 addrspace(1)* %in, align 4
@ -162,7 +162,7 @@ entry:
; SI-NOT: v_movrel
%struct.point = type { i32, i32 }
define void @multiple_structs(i32 addrspace(1)* %out) #0 {
define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 {
entry:
%a = alloca %struct.point
%b = alloca %struct.point
@ -191,7 +191,7 @@ entry:
; R600-NOT: MOVA_INT
; SI-NOT: v_movrel
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
entry:
%prv_array_const = alloca [2 x i32]
%prv_array = alloca [2 x i32]
@ -235,7 +235,7 @@ for.end:
; SI-PROMOTE: s_load_dword [[IDX:s[0-9]+]]
; SI-PROMOTE: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 16
; SI-PROMOTE: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[SCALED_IDX]], 16
define void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%0 = alloca [2 x i16]
%1 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 0
@ -258,7 +258,7 @@ entry:
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: [0x04,0x00,0x60,0xe0
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: [0x05,0x00,0x60,0xe0
define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%0 = alloca [2 x i8]
%1 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 0
@ -281,7 +281,7 @@ entry:
;
; A total of 5 bytes should be allocated and used.
; SI: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ;
define void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
%0 = alloca [3 x i8], align 1
%1 = alloca [2 x i8], align 1
@ -305,7 +305,7 @@ entry:
ret void
}
define void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i8]]
%gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
@ -319,7 +319,7 @@ entry:
ret void
}
define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i32]]
%gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
@ -332,7 +332,7 @@ entry:
ret void
}
define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i64]]
%gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
@ -347,7 +347,7 @@ entry:
%struct.pair32 = type { i32, i32 }
define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x %struct.pair32]]
%gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
@ -360,7 +360,7 @@ entry:
ret void
}
define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x %struct.pair32]
%gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
@ -373,7 +373,7 @@ entry:
ret void
}
define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
%tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
@ -394,7 +394,7 @@ entry:
; SI-NOT: ds_write
; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5 ;
define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32]
%tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
store i32 5, i32* %tmp0
@ -410,7 +410,7 @@ define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
; OPT-LABEL: @pointer_typed_alloca(
; OPT: getelementptr inbounds [256 x i32 addrspace(1)*], [256 x i32 addrspace(1)*] addrspace(3)* @pointer_typed_alloca.A.addr, i32 0, i32 %{{[0-9]+}}
; OPT: load i32 addrspace(1)*, i32 addrspace(1)* addrspace(3)* %{{[0-9]+}}, align 4
define void @pointer_typed_alloca(i32 addrspace(1)* %A) {
define amdgpu_kernel void @pointer_typed_alloca(i32 addrspace(1)* %A) {
entry:
%A.addr = alloca i32 addrspace(1)*, align 4
store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
@ -462,7 +462,7 @@ entry:
; SI: buffer_load_dword
; SI: buffer_load_dword
define void @v16i32_stack(<16 x i32> addrspace(1)* %out, i32 %a) {
define amdgpu_kernel void @v16i32_stack(<16 x i32> addrspace(1)* %out, i32 %a) {
%alloca = alloca [2 x <16 x i32>]
%tmp0 = getelementptr [2 x <16 x i32>], [2 x <16 x i32>]* %alloca, i32 0, i32 %a
%tmp5 = load <16 x i32>, <16 x i32>* %tmp0
@ -506,7 +506,7 @@ define void @v16i32_stack(<16 x i32> addrspace(1)* %out, i32 %a) {
; SI: buffer_load_dword
; SI: buffer_load_dword
define void @v16float_stack(<16 x float> addrspace(1)* %out, i32 %a) {
define amdgpu_kernel void @v16float_stack(<16 x float> addrspace(1)* %out, i32 %a) {
%alloca = alloca [2 x <16 x float>]
%tmp0 = getelementptr [2 x <16 x float>], [2 x <16 x float>]* %alloca, i32 0, i32 %a
%tmp5 = load <16 x float>, <16 x float>* %tmp0
@ -522,7 +522,7 @@ define void @v16float_stack(<16 x float> addrspace(1)* %out, i32 %a) {
; SI: buffer_load_dword
; SI: buffer_load_dword
define void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) {
define amdgpu_kernel void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) {
%alloca = alloca [16 x <2 x float>]
%tmp0 = getelementptr [16 x <2 x float>], [16 x <2 x float>]* %alloca, i32 0, i32 %a
%tmp5 = load <2 x float>, <2 x float>* %tmp0
@ -533,7 +533,7 @@ define void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) {
; OPT-LABEL: @direct_alloca_read_0xi32(
; OPT: store [0 x i32] undef, [0 x i32] addrspace(3)*
; OPT: load [0 x i32], [0 x i32] addrspace(3)*
define void @direct_alloca_read_0xi32([0 x i32] addrspace(1)* %out, i32 %index) {
define amdgpu_kernel void @direct_alloca_read_0xi32([0 x i32] addrspace(1)* %out, i32 %index) {
entry:
%tmp = alloca [0 x i32]
store [0 x i32] [], [0 x i32]* %tmp
@ -545,7 +545,7 @@ entry:
; OPT-LABEL: @direct_alloca_read_1xi32(
; OPT: store [1 x i32] zeroinitializer, [1 x i32] addrspace(3)*
; OPT: load [1 x i32], [1 x i32] addrspace(3)*
define void @direct_alloca_read_1xi32([1 x i32] addrspace(1)* %out, i32 %index) {
define amdgpu_kernel void @direct_alloca_read_1xi32([1 x i32] addrspace(1)* %out, i32 %index) {
entry:
%tmp = alloca [1 x i32]
store [1 x i32] [i32 0], [1 x i32]* %tmp

View File

@ -12,7 +12,7 @@
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
define void @ngroups_x (i32 addrspace(1)* %out) {
define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
store i32 %0, i32 addrspace(1)* %out
@ -27,7 +27,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
define void @ngroups_y (i32 addrspace(1)* %out) {
define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
store i32 %0, i32 addrspace(1)* %out
@ -42,7 +42,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
define void @ngroups_z (i32 addrspace(1)* %out) {
define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
store i32 %0, i32 addrspace(1)* %out
@ -57,7 +57,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
define void @global_size_x (i32 addrspace(1)* %out) {
define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
store i32 %0, i32 addrspace(1)* %out
@ -72,7 +72,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
define void @global_size_y (i32 addrspace(1)* %out) {
define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
store i32 %0, i32 addrspace(1)* %out
@ -87,7 +87,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
define void @global_size_z (i32 addrspace(1)* %out) {
define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
store i32 %0, i32 addrspace(1)* %out
@ -102,7 +102,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
define void @local_size_x (i32 addrspace(1)* %out) {
define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.x() #0
store i32 %0, i32 addrspace(1)* %out
@ -117,7 +117,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
define void @local_size_y (i32 addrspace(1)* %out) {
define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.y() #0
store i32 %0, i32 addrspace(1)* %out
@ -132,7 +132,7 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
define void @local_size_z (i32 addrspace(1)* %out) {
define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.z() #0
store i32 %0, i32 addrspace(1)* %out
@ -153,7 +153,7 @@ entry:
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define void @tgid_x_legacy(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
@ -165,7 +165,7 @@ entry:
; GCN-NOHSA: buffer_store_dword [[VVAL]]
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
define void @tgid_y_legacy(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
@ -181,7 +181,7 @@ entry:
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define void @tgid_z_legacy(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
@ -194,7 +194,7 @@ entry:
; FUNC-LABEL: {{^}}tidig_x_legacy:
; GCN-NOHSA: buffer_store_dword v0
define void @tidig_x_legacy(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
@ -208,7 +208,7 @@ entry:
; FUNC-LABEL: {{^}}tidig_y_legacy:
; GCN-NOHSA: buffer_store_dword v1
define void @tidig_y_legacy(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
@ -221,7 +221,7 @@ entry:
; FUNC-LABEL: {{^}}tidig_z_legacy:
; GCN-NOHSA: buffer_store_dword v2
define void @tidig_z_legacy(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out

View File

@ -4,7 +4,7 @@
; FUNC-LABEL: {{^}}v_and_i64_br:
; SI: v_and_b32
; SI: v_and_b32
define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
define amdgpu_kernel void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
entry:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0

View File

@ -11,7 +11,7 @@ declare i32 @llvm.r600.read.tidig.x() #0
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1) * %in
%b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
@ -31,7 +31,7 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
@ -42,7 +42,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
; FUNC-LABEL: {{^}}s_and_i32:
; SI: s_and_b32
define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
define amdgpu_kernel void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%and = and i32 %a, %b
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@ -50,7 +50,7 @@ define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
; FUNC-LABEL: {{^}}s_and_constant_i32:
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@ -66,7 +66,7 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
; SI: buffer_store_dword [[VK]]
define void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%and = and i32 %a, 1234567
; Just to stop future replacement of copy to vgpr + store with VALU op.
@ -83,7 +83,7 @@ define void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32
; SI: s_add_i32
; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]]
; SI: buffer_store_dword [[VK]]
define void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%and = and i32 %a, 1234567
%foo = add i32 %and, 1234567
%bar = add i32 %foo, %b
@ -93,7 +93,7 @@ define void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32
; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@ -109,7 +109,7 @@ define void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
; SI-DAG: s_load_dword [[SA:s[0-9]+]]
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
define void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -123,7 +123,7 @@ define void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1
; SI-DAG: s_load_dword [[SA:s[0-9]+]]
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
define void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -135,7 +135,7 @@ define void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
; FUNC-LABEL: {{^}}v_and_constant_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
@ -144,7 +144,7 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 64
store i32 %and, i32 addrspace(1)* %out, align 4
@ -153,7 +153,7 @@ define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, -16
store i32 %and, i32 addrspace(1)* %out, align 4
@ -162,7 +162,7 @@ define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1
; FUNC-LABEL: {{^}}s_and_i64
; SI: s_and_b64
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%and = and i64 %a, %b
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -171,7 +171,7 @@ define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
; FIXME: Should use SGPRs
; FUNC-LABEL: {{^}}s_and_i1:
; SI: v_and_b32
define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
%and = and i1 %a, %b
store i1 %and, i1 addrspace(1)* %out
ret void
@ -181,7 +181,7 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000{{$}}
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80{{$}}
; SI: buffer_store_dwordx2
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
%and = and i64 %a, 549756338176
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -191,7 +191,7 @@ define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}}
; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}}
; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}
define void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%and0 = and i64 %a, 549756338176
%and1 = and i64 %b, 549756338176
store volatile i64 %and0, i64 addrspace(1)* %out
@ -205,7 +205,7 @@ define void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687{{$}}
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i64 %a) {
define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i64 %a) {
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -223,7 +223,7 @@ define void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i64 %a) {
; SI: s_and_b32 s{{[0-9]+}}, [[B]], 62
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 %a, i64 %b, i64 %c) {
define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 %a, i64 %b, i64 %c) {
%shl.a = shl i64 %a, 1
%shl.b = shl i64 %b, 1
%and0 = and i64 %shl.a, 62
@ -238,7 +238,7 @@ define void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 %a, i64
; FUNC-LABEL: {{^}}v_and_i64:
; SI: v_and_b32
; SI: v_and_b32
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
%and = and i64 %a, %b
@ -250,7 +250,7 @@ define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addr
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0xab19b207, {{v[0-9]+}}
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
; SI: buffer_store_dwordx2
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1231231234567
store i64 %and, i64 addrspace(1)* %out, align 8
@ -268,7 +268,7 @@ define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr)
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI1]]
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
define void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load volatile i64, i64 addrspace(1)* %aptr
%b = load volatile i64, i64 addrspace(1)* %aptr
%and0 = and i64 %a, 1231231234567
@ -288,7 +288,7 @@ define void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(
; SI-NOT: and
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO0]]
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO1]]
define void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load volatile i64, i64 addrspace(1)* %aptr
%b = load volatile i64, i64 addrspace(1)* %aptr
%and0 = and i64 %a, 63
@ -304,7 +304,7 @@ define void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspac
; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]]
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
@ -317,7 +317,7 @@ define void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8
@ -331,7 +331,7 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt
; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]]
; SI-NOT: and
; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
define void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, -8
store i64 %and, i64 addrspace(1)* %out, align 8
@ -344,7 +344,7 @@ define void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 64
; SI-NOT: and
; SI: buffer_store_dword
define void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -358,7 +358,7 @@ define void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI-NOT: and
; SI: s_add_u32
; SI-NEXT: s_addc_u32
define void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i64 %b) {
define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i64 %b) {
%shl = shl i64 %a, 1
%and = and i64 %shl, 64
%add = add i64 %and, %b
@ -372,7 +372,7 @@ define void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrsp
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 1
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -387,7 +387,7 @@ define void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3ff00000
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 4607182418800017408
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -402,7 +402,7 @@ define void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbff00000
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 13830554455654793216
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -417,7 +417,7 @@ define void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3fe00000
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 4602678819172646912
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -432,7 +432,7 @@ define void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbfe00000
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 13826050856027422720
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -445,7 +445,7 @@ define void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 2.0
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 4611686018427387904
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -458,7 +458,7 @@ define void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, -2.0
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 13835058055282163712
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -473,7 +473,7 @@ define void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x40100000
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 4616189618054758400
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -488,7 +488,7 @@ define void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xc0100000
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 13839561654909534208
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -505,7 +505,7 @@ define void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 1082130432
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -518,7 +518,7 @@ define void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, -1065353216
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -531,7 +531,7 @@ define void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrsp
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 4647714815446351872
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@ -544,7 +544,7 @@ define void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrs
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 13871086852301127680
store i64 %and, i64 addrspace(1)* %out, align 8
ret void

View File

@ -11,22 +11,22 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
; HSA: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
@ -34,8 +34,8 @@ define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -43,15 +43,15 @@ define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -59,8 +59,8 @@ define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -68,8 +68,8 @@ define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
%val2 = call i32 @llvm.amdgcn.workgroup.id.z()
@ -79,29 +79,29 @@ define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.x()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -109,8 +109,8 @@ define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -118,8 +118,8 @@ define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
@ -129,8 +129,8 @@ define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
@ -146,8 +146,8 @@ define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %bc
@ -155,8 +155,8 @@ define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
define void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %bc
@ -164,58 +164,58 @@ define void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
; HSA: define void @use_private_to_flat_addrspacecast(i32* %ptr) #11 {
define void @use_private_to_flat_addrspacecast(i32* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #11 {
define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #1 {
%stof = addrspacecast i32* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
; HSA: define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
; HSA: define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
store volatile i32 0, i32* %ftos
ret void
}
; No-op addrspacecast should not use queue ptr
; HSA: define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
; HSA: define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
%ld = load volatile i32, i32 addrspace(4)* %stof
ret void
}
; HSA: define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
; HSA: define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
%ld = load volatile i32, i32 addrspace(2)* %ftos
ret void

View File

@ -12,22 +12,22 @@ declare i32 @llvm.r600.read.local.size.x() #0
declare i32 @llvm.r600.read.local.size.y() #0
declare i32 @llvm.r600.read.local.size.z() #0
; ALL: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.tgid.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; ALL: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.tgid.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; ALL: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tgid.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
%val1 = call i32 @llvm.r600.read.tgid.y()
@ -35,8 +35,8 @@ define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tgid.x()
%val1 = call i32 @llvm.r600.read.tgid.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -44,15 +44,15 @@ define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.tgid.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; ALL: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tgid.x()
%val1 = call i32 @llvm.r600.read.tgid.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -60,8 +60,8 @@ define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tgid.y()
%val1 = call i32 @llvm.r600.read.tgid.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -69,8 +69,8 @@ define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tgid.x()
%val1 = call i32 @llvm.r600.read.tgid.y()
%val2 = call i32 @llvm.r600.read.tgid.z()
@ -80,29 +80,29 @@ define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.tidig.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; ALL: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.tidig.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; ALL: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.tidig.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; ALL: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tidig.x()
%val1 = call i32 @llvm.r600.read.tgid.x()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -110,8 +110,8 @@ define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tidig.y()
%val1 = call i32 @llvm.r600.read.tgid.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@ -119,8 +119,8 @@ define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tidig.x()
%val1 = call i32 @llvm.r600.read.tidig.y()
%val2 = call i32 @llvm.r600.read.tidig.z()
@ -130,8 +130,8 @@ define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
; ALL: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
; ALL: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.r600.read.tidig.x()
%val1 = call i32 @llvm.r600.read.tidig.y()
%val2 = call i32 @llvm.r600.read.tidig.z()
@ -147,25 +147,25 @@ define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
ret void
}
; HSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 {
; NOHSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 {
; NOHSA: define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.local.size.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 {
; NOHSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 {
; NOHSA: define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.local.size.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 {
; NOHSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 {
; NOHSA: define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.r600.read.local.size.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void

View File

@ -6,13 +6,13 @@
; CHECK-LABEL: {{^}}test:
; CHECK: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, __unnamed_1
; CHECK: s_endpgm
define void @test() {
define amdgpu_kernel void @test() {
store i32 1, i32 addrspace(1)* @0
ret void
}
; CHECK-LABEL: {{^}}__unnamed_2:
; CHECK: s_endpgm
define void @1() {
define amdgpu_kernel void @1() {
ret void
}

View File

@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
; GCN-LABEL: {{^}}anyext_i1_i32:
; GCN: v_cndmask_b32_e64
define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
define amdgpu_kernel void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
entry:
%tmp = icmp eq i32 %cond, 0
%tmp1 = zext i1 %tmp to i8
@ -22,7 +22,7 @@ entry:
; VI: v_xor_b32_e32 [[XOR:v[0-9]+]], -1, [[ADD]]
; VI: v_and_b32_e32 [[AND:v[0-9]+]], 1, [[XOR]]
; VI: buffer_store_dword [[AND]]
define void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %a, i16 addrspace(1)* %b) {
define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %a, i16 addrspace(1)* %b) {
entry:
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%tid.y = call i32 @llvm.amdgcn.workitem.id.y()

View File

@ -24,7 +24,7 @@ declare void @llvm.amdgcn.s.barrier() #2
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {
define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {
%alloca = alloca [16 x i32], align 16
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)

View File

@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
; SI-DAG: v_mul_lo_i32
; SI-DAG: v_mul_hi_i32
; SI: s_endpgm
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
define amdgpu_kernel void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
%a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0

View File

@ -13,7 +13,7 @@
; CIVI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CIVI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CIVI: v_or_b32_e32
define void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
define amdgpu_kernel void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
%result = ashr <2 x i16> %lhs, %rhs
store <2 x i16> %result, <2 x i16> addrspace(1)* %out
ret void
@ -40,7 +40,7 @@ define void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16
; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@ -57,7 +57,7 @@ define void @v_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)*
; GFX9: s_load_dword [[RHS:s[0-9]+]]
; GFX9: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
define void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
define amdgpu_kernel void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@ -72,7 +72,7 @@ define void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)
; GFX9: s_load_dword [[LHS:s[0-9]+]]
; GFX9: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
define void @ashr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
define amdgpu_kernel void @ashr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@ -86,7 +86,7 @@ define void @ashr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)
; GCN-LABEL: {{^}}ashr_imm_v_v2i16:
; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], -4
define void @ashr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
define amdgpu_kernel void @ashr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@ -100,7 +100,7 @@ define void @ashr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(
; GCN-LABEL: {{^}}ashr_v_imm_v2i16:
; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], 8, [[LHS]]
define void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@ -117,7 +117,7 @@ define void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: {{buffer|flat}}_store_dwordx2
define void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
@ -135,7 +135,7 @@ define void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)*
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GCN: {{buffer|flat}}_store_dwordx2
define void @ashr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
define amdgpu_kernel void @ashr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext

View File

@ -12,7 +12,7 @@
; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
@ -33,7 +33,7 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
%result = extractvalue { i64, i1 } %pair, 0
@ -45,7 +45,7 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
@ -65,7 +65,7 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
define amdgpu_kernel void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
@ -84,7 +84,7 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
define amdgpu_kernel void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
%result = extractvalue { i64, i1 } %pair, 0

View File

@ -5,7 +5,7 @@
; FUNC-LABEL: {{^}}atomic_add_local:
; R600: LDS_ADD *
; SI: ds_add_u32
define void @atomic_add_local(i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_add_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
ret void
}
@ -13,7 +13,7 @@ define void @atomic_add_local(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
; R600: LDS_ADD *
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32, i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
@ -22,7 +22,7 @@ define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_add_ret_local:
; R600: LDS_ADD_RET *
; SI: ds_add_rtn_u32
define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
@ -31,7 +31,7 @@ define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
; R600: LDS_ADD_RET *
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32, i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out

View File

@ -5,7 +5,7 @@
; FUNC-LABEL: {{^}}atomic_sub_local:
; R600: LDS_SUB *
; SI: ds_sub_u32
define void @atomic_sub_local(i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_sub_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
ret void
}
@ -13,7 +13,7 @@ define void @atomic_sub_local(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
; R600: LDS_SUB *
; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32, i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
@ -22,7 +22,7 @@ define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_sub_ret_local:
; R600: LDS_SUB_RET *
; SI: ds_sub_rtn_u32
define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
@ -31,7 +31,7 @@ define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
; R600: LDS_SUB_RET *
; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
define amdgpu_kernel void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32, i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out

View File

@ -5,7 +5,7 @@
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @min_64_max_64() #0 {
define amdgpu_kernel void @min_64_max_64() #0 {
entry:
ret void
}
@ -16,7 +16,7 @@ attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @min_64_max_128() #1 {
define amdgpu_kernel void @min_64_max_128() #1 {
entry:
ret void
}
@ -27,7 +27,7 @@ attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @min_128_max_128() #2 {
define amdgpu_kernel void @min_128_max_128() #2 {
entry:
ret void
}
@ -39,7 +39,7 @@ attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
; CHECK: NumSGPRsForWavesPerEU: 13
; CHECK: NumVGPRsForWavesPerEU: 32
@var = addrspace(1) global float 0.0
define void @min_1024_max_2048() #3 {
define amdgpu_kernel void @min_1024_max_2048() #3 {
%val0 = load volatile float, float addrspace(1)* @var
%val1 = load volatile float, float addrspace(1)* @var
%val2 = load volatile float, float addrspace(1)* @var

View File

@ -8,7 +8,7 @@
; ALL: SGPRBlocks: 1
; ALL: NumSGPRsForWavesPerEU: 9
define void @max_9_sgprs(i32 addrspace(1)* %out1,
define amdgpu_kernel void @max_9_sgprs(i32 addrspace(1)* %out1,
i32 addrspace(1)* %out2,
i32 addrspace(1)* %out3,
@ -49,7 +49,7 @@ define void @max_9_sgprs(i32 addrspace(1)* %out1,
; TOSMEM: SGPRBlocks: 1
; TOSMEM: NumSGPRsForWavesPerEU: 16
define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
i32 addrspace(1)* %out2,
i32 addrspace(1)* %out3,
i32 addrspace(1)* %out4,
@ -90,7 +90,7 @@ stores:
; XALL: SGPRBlocks: 2
; XALL: NumSGPRsForWavesPerEU: 18
;define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
; i32 addrspace(1)* %out2,
; i32 addrspace(1)* %out3,
; i32 addrspace(1)* %out4,

View File

@ -5,7 +5,7 @@
; CHECK-LABEL: {{^}}max_20_vgprs:
; CHECK: VGPRBlocks: 4
; CHECK: NumVGPRsForWavesPerEU: 20
define void @max_20_vgprs() #1 {
define amdgpu_kernel void @max_20_vgprs() #1 {
%val0 = load volatile float, float addrspace(1)* @var
%val1 = load volatile float, float addrspace(1)* @var
%val2 = load volatile float, float addrspace(1)* @var

View File

@ -6,7 +6,7 @@
; CHECK: VGPRBlocks: 32
; CHECK: NumSGPRsForWavesPerEU: 102
; CHECK: NumVGPRsForWavesPerEU: 129
define void @empty_exactly_1() #0 {
define amdgpu_kernel void @empty_exactly_1() #0 {
entry:
ret void
}
@ -18,7 +18,7 @@ attributes #0 = {"amdgpu-waves-per-eu"="1,1"}
; CHECK: VGPRBlocks: 10
; CHECK: NumSGPRsForWavesPerEU: 102
; CHECK: NumVGPRsForWavesPerEU: 41
define void @empty_exactly_5() #1 {
define amdgpu_kernel void @empty_exactly_5() #1 {
entry:
ret void
}
@ -30,7 +30,7 @@ attributes #1 = {"amdgpu-waves-per-eu"="5,5"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @empty_exactly_10() #2 {
define amdgpu_kernel void @empty_exactly_10() #2 {
entry:
ret void
}
@ -42,7 +42,7 @@ attributes #2 = {"amdgpu-waves-per-eu"="10,10"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @empty_at_least_1() #3 {
define amdgpu_kernel void @empty_at_least_1() #3 {
entry:
ret void
}
@ -54,7 +54,7 @@ attributes #3 = {"amdgpu-waves-per-eu"="1"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @empty_at_least_5() #4 {
define amdgpu_kernel void @empty_at_least_5() #4 {
entry:
ret void
}
@ -66,7 +66,7 @@ attributes #4 = {"amdgpu-waves-per-eu"="5"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @empty_at_least_10() #5 {
define amdgpu_kernel void @empty_at_least_10() #5 {
entry:
ret void
}
@ -80,7 +80,7 @@ attributes #5 = {"amdgpu-waves-per-eu"="10"}
; CHECK: VGPRBlocks: 10
; CHECK: NumSGPRsForWavesPerEU: 102
; CHECK: NumVGPRsForWavesPerEU: 41
define void @empty_at_most_5() #6 {
define amdgpu_kernel void @empty_at_most_5() #6 {
entry:
ret void
}
@ -92,7 +92,7 @@ attributes #6 = {"amdgpu-waves-per-eu"="1,5"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @empty_at_most_10() #7 {
define amdgpu_kernel void @empty_at_most_10() #7 {
entry:
ret void
}
@ -106,7 +106,7 @@ attributes #7 = {"amdgpu-waves-per-eu"="1,10"}
; CHECK: VGPRBlocks: 0
; CHECK: NumSGPRsForWavesPerEU: 1
; CHECK: NumVGPRsForWavesPerEU: 1
define void @empty_between_5_and_10() #8 {
define amdgpu_kernel void @empty_between_5_and_10() #8 {
entry:
ret void
}
@ -120,7 +120,7 @@ attributes #8 = {"amdgpu-waves-per-eu"="5,10"}
; CHECK: VGPRBlocks: 5
; CHECK: NumSGPRsForWavesPerEU: 13
; CHECK: NumVGPRsForWavesPerEU: 24
define void @exactly_10() #9 {
define amdgpu_kernel void @exactly_10() #9 {
%val0 = load volatile float, float addrspace(1)* @var
%val1 = load volatile float, float addrspace(1)* @var
%val2 = load volatile float, float addrspace(1)* @var

View File

@ -1,56 +1,56 @@
; RUN: not llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s
; CHECK: can't parse integer attribute amdgpu-num-sgpr
define void @unparseable_single_0() #0 {
define amdgpu_kernel void @unparseable_single_0() #0 {
entry:
ret void
}
attributes #0 = {"amdgpu-num-sgpr"}
; CHECK: can't parse integer attribute amdgpu-num-sgpr
define void @unparseable_single_1() #1 {
define amdgpu_kernel void @unparseable_single_1() #1 {
entry:
ret void
}
attributes #1 = {"amdgpu-num-sgpr"="k"}
; CHECK: can't parse integer attribute amdgpu-num-sgpr
define void @unparseable_single_2() #2 {
define amdgpu_kernel void @unparseable_single_2() #2 {
entry:
ret void
}
attributes #2 = {"amdgpu-num-sgpr"="1,2"}
; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size
define void @unparseable_pair_0() #3 {
define amdgpu_kernel void @unparseable_pair_0() #3 {
entry:
ret void
}
attributes #3 = {"amdgpu-flat-work-group-size"}
; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size
define void @unparseable_pair_1() #4 {
define amdgpu_kernel void @unparseable_pair_1() #4 {
entry:
ret void
}
attributes #4 = {"amdgpu-flat-work-group-size"="k"}
; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
define void @unparseable_pair_2() #5 {
define amdgpu_kernel void @unparseable_pair_2() #5 {
entry:
ret void
}
attributes #5 = {"amdgpu-flat-work-group-size"="1"}
; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
define void @unparseable_pair_3() #6 {
define amdgpu_kernel void @unparseable_pair_3() #6 {
entry:
ret void
}
attributes #6 = {"amdgpu-flat-work-group-size"="1,k"}
; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
define void @unparseable_pair_4() #7 {
define amdgpu_kernel void @unparseable_pair_4() #7 {
entry:
ret void
}

View File

@ -15,7 +15,7 @@
; GCN: {{^}}[[END]]:
; GCN: s_endpgm
define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) #0 {
define amdgpu_kernel void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) #0 {
%cmp = icmp ne i32 %val, 0
br i1 %cmp, label %store, label %end
@ -39,7 +39,7 @@ end:
; GCN: {{^}}[[END]]:
; GCN: s_endpgm
define void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
define amdgpu_kernel void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
%cmp0 = icmp ne i1 %val, 0
br i1 %cmp0, label %store, label %end

View File

@ -2,7 +2,7 @@
; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s
; CHECK-LABEL: {{^}}test_loop:
define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
define amdgpu_kernel void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
entry:
br label %loop.body

View File

@ -5,7 +5,7 @@
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
define void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
@ -32,7 +32,7 @@ define void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
; GCN: [[BFE]]
; GCN: [[SHL]]
define void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
@ -52,7 +52,7 @@ define void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]]
; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
define void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width
@ -68,7 +68,7 @@ define void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
; GCN-NEXT: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
define void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width
@ -83,7 +83,7 @@ define void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
define void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
@ -110,7 +110,7 @@ define void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
; GCN: [[BFE]]
; GCN: [[SHL]]
define void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
@ -130,7 +130,7 @@ define void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(
; GCN: s_load_dword [[WIDTH:s[0-9]+]]
; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]]
; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
define void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width
@ -146,7 +146,7 @@ define void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
; GCN-NEXT: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
define void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width

View File

@ -2,7 +2,7 @@
; CHECK: {{^}}bfe_def:
; CHECK: BFE_UINT
define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
define amdgpu_kernel void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
entry:
%0 = lshr i32 %x, 5
%1 = and i32 %0, 15 ; 0xf
@ -17,7 +17,7 @@ entry:
; CHECK: {{^}}bfe_shift:
; CHECK-NOT: BFE_UINT
define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
define amdgpu_kernel void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
entry:
%0 = lshr i32 %x, 16
%1 = and i32 %0, 65535 ; 0xffff

View File

@ -9,7 +9,7 @@
; R600: BFI_INT
; SI: @bfi_def
; SI: v_bfi_b32
define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %x, -1
%1 = and i32 %z, %0
@ -25,7 +25,7 @@ entry:
; R600: BFI_INT
; SI: @bfi_sha256_ch
; SI: v_bfi_b32
define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %y, %z
%1 = and i32 %x, %0
@ -42,7 +42,7 @@ entry:
; SI: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
; SI: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = and i32 %x, %z
%1 = or i32 %x, %z

View File

@ -4,7 +4,7 @@
; FUNC-LABEL: {{^}}bfm_pattern:
; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
define void @bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
define amdgpu_kernel void @bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
%a = shl i32 1, %x
%b = sub i32 %a, 1
%c = shl i32 %b, %y
@ -14,7 +14,7 @@ define void @bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
; FUNC-LABEL: {{^}}bfm_pattern_simple:
; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0
define void @bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
define amdgpu_kernel void @bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
%a = shl i32 1, %x
%b = sub i32 %a, 1
store i32 %b, i32 addrspace(1)* %out

View File

@ -11,7 +11,7 @@
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) {
define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) {
%vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
@ -27,7 +27,7 @@ define void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %ou
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) {
define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) {
%vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
@ -43,7 +43,7 @@ define void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %ou
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) {
define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) {
%vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out
@ -59,7 +59,7 @@ define void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %o
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) {
define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) {
%vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
@ -70,7 +70,7 @@ define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %o
; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
; GCN-NOT: store_dword
define void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
%bc = bitcast i64 %undef to <2 x i32>
store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
@ -79,7 +79,7 @@ define void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)*
; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
; GCN-NOT: store_dword
define void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
%bc = bitcast i64 %undef to <2 x i32>
%elt1 = extractelement <2 x i32> %bc, i32 1

View File

@ -7,7 +7,7 @@
; GCN-LABEL: {{^}}materialize_0_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_0_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_0_i32(i32 addrspace(1)* %out) {
store i32 0, i32 addrspace(1)* %out
ret void
}
@ -16,7 +16,7 @@ define void @materialize_0_i32(i32 addrspace(1)* %out) {
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_0_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_0_i64(i64 addrspace(1)* %out) {
store i64 0, i64 addrspace(1)* %out
ret void
}
@ -24,7 +24,7 @@ define void @materialize_0_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_neg1_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], -1{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_neg1_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_neg1_i32(i32 addrspace(1)* %out) {
store i32 -1, i32 addrspace(1)* %out
ret void
}
@ -33,7 +33,7 @@ define void @materialize_neg1_i32(i32 addrspace(1)* %out) {
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_neg1_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_neg1_i64(i64 addrspace(1)* %out) {
store i64 -1, i64 addrspace(1)* %out
ret void
}
@ -41,7 +41,7 @@ define void @materialize_neg1_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_signbit_i32:
; GCN: v_bfrev_b32_e32 [[K:v[0-9]+]], 1{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_signbit_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_signbit_i32(i32 addrspace(1)* %out) {
store i32 -2147483648, i32 addrspace(1)* %out
ret void
}
@ -50,7 +50,7 @@ define void @materialize_signbit_i32(i32 addrspace(1)* %out) {
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], 1{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_signbit_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_signbit_i64(i64 addrspace(1)* %out) {
store i64 -9223372036854775808, i64 addrspace(1)* %out
ret void
}
@ -58,7 +58,7 @@ define void @materialize_signbit_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_rev_neg16_i32:
; GCN: v_bfrev_b32_e32 [[K:v[0-9]+]], -16{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_rev_neg16_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_neg16_i32(i32 addrspace(1)* %out) {
store i32 268435455, i32 addrspace(1)* %out
ret void
}
@ -67,7 +67,7 @@ define void @materialize_rev_neg16_i32(i32 addrspace(1)* %out) {
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], -16{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_rev_neg16_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_neg16_i64(i64 addrspace(1)* %out) {
store i64 1152921504606846975, i64 addrspace(1)* %out
ret void
}
@ -75,7 +75,7 @@ define void @materialize_rev_neg16_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_rev_neg17_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xf7ffffff{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_rev_neg17_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_neg17_i32(i32 addrspace(1)* %out) {
store i32 -134217729, i32 addrspace(1)* %out
ret void
}
@ -84,7 +84,7 @@ define void @materialize_rev_neg17_i32(i32 addrspace(1)* %out) {
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0xf7ffffff{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_rev_neg17_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_neg17_i64(i64 addrspace(1)* %out) {
store i64 -576460752303423489, i64 addrspace(1)* %out
ret void
}
@ -92,7 +92,7 @@ define void @materialize_rev_neg17_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_rev_64_i32:
; GCN: v_bfrev_b32_e32 [[K:v[0-9]+]], 64{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_rev_64_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_64_i32(i32 addrspace(1)* %out) {
store i32 33554432, i32 addrspace(1)* %out
ret void
}
@ -101,7 +101,7 @@ define void @materialize_rev_64_i32(i32 addrspace(1)* %out) {
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], 64{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_rev_64_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_64_i64(i64 addrspace(1)* %out) {
store i64 144115188075855872, i64 addrspace(1)* %out
ret void
}
@ -109,7 +109,7 @@ define void @materialize_rev_64_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_rev_65_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x82000000{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_rev_65_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_65_i32(i32 addrspace(1)* %out) {
store i32 -2113929216, i32 addrspace(1)* %out
ret void
}
@ -118,7 +118,7 @@ define void @materialize_rev_65_i32(i32 addrspace(1)* %out) {
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0x82000000{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_rev_65_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_65_i64(i64 addrspace(1)* %out) {
store i64 -9079256848778919936, i64 addrspace(1)* %out
ret void
}
@ -126,7 +126,7 @@ define void @materialize_rev_65_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_rev_3_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], -2.0{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_rev_3_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_3_i32(i32 addrspace(1)* %out) {
store i32 -1073741824, i32 addrspace(1)* %out
ret void
}
@ -135,7 +135,7 @@ define void @materialize_rev_3_i32(i32 addrspace(1)* %out) {
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], -2.0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_rev_3_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_3_i64(i64 addrspace(1)* %out) {
store i64 -4611686018427387904, i64 addrspace(1)* %out
ret void
}
@ -143,7 +143,7 @@ define void @materialize_rev_3_i64(i64 addrspace(1)* %out) {
; GCN-LABEL: {{^}}materialize_rev_1.0_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1fc{{$}}
; GCN: buffer_store_dword [[K]]
define void @materialize_rev_1.0_i32(i32 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_1.0_i32(i32 addrspace(1)* %out) {
store i32 508, i32 addrspace(1)* %out
ret void
}
@ -152,70 +152,70 @@ define void @materialize_rev_1.0_i32(i32 addrspace(1)* %out) {
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0x1fc{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
define void @materialize_rev_1.0_i64(i64 addrspace(1)* %out) {
define amdgpu_kernel void @materialize_rev_1.0_i64(i64 addrspace(1)* %out) {
store i64 508, i64 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}s_materialize_0_i32:
; GCN: s_mov_b32 s{{[0-9]+}}, 0{{$}}
define void @s_materialize_0_i32() {
define amdgpu_kernel void @s_materialize_0_i32() {
call void asm sideeffect "; use $0", "s"(i32 0)
ret void
}
; GCN-LABEL: {{^}}s_materialize_1_i32:
; GCN: s_mov_b32 s{{[0-9]+}}, 1{{$}}
define void @s_materialize_1_i32() {
define amdgpu_kernel void @s_materialize_1_i32() {
call void asm sideeffect "; use $0", "s"(i32 1)
ret void
}
; GCN-LABEL: {{^}}s_materialize_neg1_i32:
; GCN: s_mov_b32 s{{[0-9]+}}, -1{{$}}
define void @s_materialize_neg1_i32() {
define amdgpu_kernel void @s_materialize_neg1_i32() {
call void asm sideeffect "; use $0", "s"(i32 -1)
ret void
}
; GCN-LABEL: {{^}}s_materialize_signbit_i32:
; GCN: s_brev_b32 s{{[0-9]+}}, 1{{$}}
define void @s_materialize_signbit_i32() {
define amdgpu_kernel void @s_materialize_signbit_i32() {
call void asm sideeffect "; use $0", "s"(i32 -2147483648)
ret void
}
; GCN-LABEL: {{^}}s_materialize_rev_64_i32:
; GCN: s_brev_b32 s{{[0-9]+}}, 64{{$}}
define void @s_materialize_rev_64_i32() {
define amdgpu_kernel void @s_materialize_rev_64_i32() {
call void asm sideeffect "; use $0", "s"(i32 33554432)
ret void
}
; GCN-LABEL: {{^}}s_materialize_rev_65_i32:
; GCN: s_mov_b32 s{{[0-9]+}}, 0x82000000{{$}}
define void @s_materialize_rev_65_i32() {
define amdgpu_kernel void @s_materialize_rev_65_i32() {
call void asm sideeffect "; use $0", "s"(i32 -2113929216)
ret void
}
; GCN-LABEL: {{^}}s_materialize_rev_neg16_i32:
; GCN: s_brev_b32 s{{[0-9]+}}, -16{{$}}
define void @s_materialize_rev_neg16_i32() {
define amdgpu_kernel void @s_materialize_rev_neg16_i32() {
call void asm sideeffect "; use $0", "s"(i32 268435455)
ret void
}
; GCN-LABEL: {{^}}s_materialize_rev_neg17_i32:
; GCN: s_mov_b32 s{{[0-9]+}}, 0xf7ffffff{{$}}
define void @s_materialize_rev_neg17_i32() {
define amdgpu_kernel void @s_materialize_rev_neg17_i32() {
call void asm sideeffect "; use $0", "s"(i32 -134217729)
ret void
}
; GCN-LABEL: {{^}}s_materialize_rev_1.0_i32:
; GCN: s_movk_i32 s{{[0-9]+}}, 0x1fc{{$}}
define void @s_materialize_rev_1.0_i32() {
define amdgpu_kernel void @s_materialize_rev_1.0_i32() {
call void asm sideeffect "; use $0", "s"(i32 508)
ret void
}

View File

@ -14,7 +14,7 @@ declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
; FUNC-LABEL: {{^}}s_brev_i16:
; SI: s_brev_b32
define void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
define amdgpu_kernel void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
store i16 %brev, i16 addrspace(1)* %out
ret void
@ -22,7 +22,7 @@ define void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
; FUNC-LABEL: {{^}}v_brev_i16:
; SI: v_bfrev_b32_e32
define void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
define amdgpu_kernel void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
%val = load i16, i16 addrspace(1)* %valptr
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
store i16 %brev, i16 addrspace(1)* %out
@ -35,7 +35,7 @@ define void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalia
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
store i32 %brev, i32 addrspace(1)* %out
ret void
@ -46,7 +46,7 @@ define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
%val = load i32, i32 addrspace(1)* %valptr
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
store i32 %brev, i32 addrspace(1)* %out
@ -56,7 +56,7 @@ define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalia
; FUNC-LABEL: {{^}}s_brev_v2i32:
; SI: s_brev_b32
; SI: s_brev_b32
define void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
ret void
@ -65,7 +65,7 @@ define void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val)
; FUNC-LABEL: {{^}}v_brev_v2i32:
; SI: v_bfrev_b32_e32
; SI: v_bfrev_b32_e32
define void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
@ -73,7 +73,7 @@ define void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrsp
}
; FUNC-LABEL: {{^}}s_brev_i64:
define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
store i64 %brev, i64 addrspace(1)* %out
ret void
@ -81,7 +81,7 @@ define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
; FUNC-LABEL: {{^}}v_brev_i64:
; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0
define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
%val = load i64, i64 addrspace(1)* %valptr
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
store i64 %brev, i64 addrspace(1)* %out
@ -89,14 +89,14 @@ define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalia
}
; FUNC-LABEL: {{^}}s_brev_v2i64:
define void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_brev_v2i64:
define void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
store <2 x i64> %brev, <2 x i64> addrspace(1)* %out

View File

@ -20,7 +20,7 @@
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[B_F16]]
; GCN: s_endpgm
define void @br_cc_f16(
define amdgpu_kernel void @br_cc_f16(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b) {
@ -59,7 +59,7 @@ two:
; GCN: two{{$}}
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
define void @br_cc_f16_imm_a(
define amdgpu_kernel void @br_cc_f16_imm_a(
half addrspace(1)* %r,
half addrspace(1)* %b) {
entry:
@ -92,7 +92,7 @@ two:
; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
; GCN: buffer_store_short v[[B_F16]]
; GCN: s_endpgm
define void @br_cc_f16_imm_b(
define amdgpu_kernel void @br_cc_f16_imm_b(
half addrspace(1)* %r,
half addrspace(1)* %a) {
entry:

View File

@ -5,7 +5,7 @@
; FAIL: LLVM ERROR: Error while trying to spill VCC from class SReg_64: Cannot scavenge register without an emergency spill slot!
define void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 {
define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 {
entry:
%sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={SGPR0}"() #0
%sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={SGPR1}"() #0

View File

@ -26,7 +26,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
; GCN: buffer_store_dword [[V_CND]]
; GCN: s_endpgm
define void @uniform_conditional_max_short_forward_branch(i32 addrspace(1)* %arg, i32 %cnd) #0 {
define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(i32 addrspace(1)* %arg, i32 %cnd) #0 {
bb:
%cmp = icmp eq i32 %cnd, 0
br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
@ -68,7 +68,7 @@ bb3:
; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
; GCN: buffer_store_dword [[V_CND]]
; GCN: s_endpgm
define void @uniform_conditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %cnd) #0 {
define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %cnd) #0 {
bb0:
%cmp = icmp eq i32 %cnd, 0
br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch
@ -108,7 +108,7 @@ bb3:
; GCN: [[ENDBB]]:
; GCN: buffer_store_dword [[V_CND]]
; GCN: s_endpgm
define void @uniform_conditional_min_long_forward_vcnd_branch(float addrspace(1)* %arg, float %cnd) #0 {
define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(float addrspace(1)* %arg, float %cnd) #0 {
bb0:
%cmp = fcmp oeq float %cnd, 0.0
br i1 %cmp, label %bb3, label %bb2 ; + 8 dword branch
@ -141,7 +141,7 @@ bb3:
; GCN: s_or_b64 exec, exec, [[SAVE]]
; GCN: buffer_store_dword
; GCN: s_endpgm
define void @min_long_forward_vbranch(i32 addrspace(1)* %arg) #0 {
define amdgpu_kernel void @min_long_forward_vbranch(i32 addrspace(1)* %arg) #0 {
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = zext i32 %tid to i64
@ -188,7 +188,7 @@ bb3:
; GCN-NEXT: [[ENDBB]]:
; GCN-NEXT: s_endpgm
define void @long_backward_sbranch(i32 addrspace(1)* %arg) #0 {
define amdgpu_kernel void @long_backward_sbranch(i32 addrspace(1)* %arg) #0 {
bb:
br label %bb2
@ -243,7 +243,7 @@ bb3:
; GCN: buffer_store_dword [[BB4_K]]
; GCN-NEXT: s_endpgm
; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
define void @uniform_unconditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
bb0:
%tmp = icmp ne i32 %arg1, 0
br i1 %tmp, label %bb2, label %bb3
@ -285,7 +285,7 @@ bb4:
; GCN-NEXT: s_subb_u32 vcc_hi, vcc_hi, 0{{$}}
; GCN-NEXT: s_setpc_b64 vcc
; GCN-NEXT .Lfunc_end{{[0-9]+}}:
define void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
entry:
br label %loop
@ -342,7 +342,7 @@ loop:
; GCN-NEXT: v_nop_e64
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_endpgm
define void @expand_requires_expand(i32 %cond0) #0 {
define amdgpu_kernel void @expand_requires_expand(i32 %cond0) #0 {
bb0:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%cmp0 = icmp slt i32 %cond0, 0
@ -399,7 +399,7 @@ bb3:
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
; GCN-NEXT: s_sleep 5
; GCN-NEXT: s_endpgm
define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 {
define amdgpu_kernel void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%d_cmp = icmp ult i32 %tid, 16
@ -462,7 +462,7 @@ endif:
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
; GCN: buffer_store_dword
; GCN-NEXT: s_endpgm
define void @analyze_mask_branch() #0 {
define amdgpu_kernel void @analyze_mask_branch() #0 {
entry:
%reg = call float asm sideeffect "v_mov_b32_e64 $0, 0", "=v"()
%cmp0 = fcmp ogt float %reg, 0.000000e+00

View File

@ -17,7 +17,7 @@ declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
store i32 %bswap, i32 addrspace(1)* %out, align 4
@ -32,7 +32,7 @@ define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
@ -53,7 +53,7 @@ define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
@ -86,7 +86,7 @@ define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
%val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
@ -95,21 +95,21 @@ define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(
; FUNC-LABEL: {{^}}test_bswap_i64:
; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0
define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%val = load i64, i64 addrspace(1)* %in, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
store i64 %bswap, i64 addrspace(1)* %out, align 8
ret void
}
define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
ret void
}
define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
%bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32

View File

@ -10,7 +10,7 @@
; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
; SI: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
define amdgpu_kernel void @build_vector2 (<2 x i32> addrspace(1)* %out) {
entry:
store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
ret void
@ -28,7 +28,7 @@ entry:
; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
; SI-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
; SI: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
define amdgpu_kernel void @build_vector4 (<4 x i32> addrspace(1)* %out) {
entry:
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
ret void

View File

@ -10,7 +10,7 @@
declare i32 @external_function(i32) nounwind
define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
define amdgpu_kernel void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
@ -25,7 +25,7 @@ define i32 @defined_function(i32 %x) nounwind noinline {
ret i32 %y
}
define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
define amdgpu_kernel void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
@ -35,7 +35,7 @@ define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
ret void
}
define void @test_tail_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
define amdgpu_kernel void @test_tail_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr

View File

@ -3,7 +3,7 @@
; GCN-LABEL: {{^}}store_fi_lifetime:
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[FI]]
define void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
define amdgpu_kernel void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
%b = alloca i8
call void @llvm.lifetime.start(i64 1, i8* %b)
@ -18,7 +18,7 @@ entry:
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}}
; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
define amdgpu_kernel void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
%tmp = alloca float
store float 4.0, float *%tmp
store float* %tmp, float* addrspace(3)* %ptr
@ -38,7 +38,7 @@ define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]]
define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
%tmp0 = alloca float
%tmp1 = alloca float
store float 4.0, float* %tmp0
@ -54,7 +54,7 @@ define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
; GCN: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
define void @stored_fi_to_self() #0 {
define amdgpu_kernel void @stored_fi_to_self() #0 {
%tmp = alloca i32*
; Avoid optimizing everything out
@ -73,7 +73,7 @@ define void @stored_fi_to_self() #0 {
; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x804{{$}}
; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2052{{$}}
define void @stored_fi_to_self_offset() #0 {
define amdgpu_kernel void @stored_fi_to_self_offset() #0 {
%tmp0 = alloca [512 x i32]
%tmp1 = alloca i32*
@ -98,7 +98,7 @@ define void @stored_fi_to_self_offset() #0 {
; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
define void @stored_fi_to_fi() #0 {
define amdgpu_kernel void @stored_fi_to_fi() #0 {
%tmp0 = alloca i32*
%tmp1 = alloca i32*
%tmp2 = alloca i32*
@ -118,7 +118,7 @@ define void @stored_fi_to_fi() #0 {
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[FI]]
define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
define amdgpu_kernel void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
%tmp = alloca float
store float 0.0, float *%tmp
store float* %tmp, float* addrspace(1)* %ptr
@ -136,7 +136,7 @@ define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
%tmp0 = alloca float
%tmp1 = alloca float
%tmp2 = alloca float
@ -163,7 +163,7 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
%tmp0 = alloca [4096 x i32]
%tmp1 = alloca [4096 x i32]
%gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 0
@ -186,7 +186,7 @@ define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1@gotpcrel32@hi+4
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
; GCN: buffer_store_dword [[FI]]
define void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
define amdgpu_kernel void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
entry:
%b = alloca i32, align 4
%tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4

View File

@ -7,7 +7,7 @@
; GCN: ds_write_b32
; GCN: s_branch [[LABEL]]
; GCN: s_endpgm
define void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind {
define amdgpu_kernel void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
%cmp = icmp eq i32 %n, -1
br i1 %cmp, label %for.exit, label %for.body
@ -31,7 +31,7 @@ for.body:
; GCN: ds_read_b32
; GCN: ds_write_b32
; GCN: s_branch [[LABEL]]
define void @loop_const_true(float addrspace(3)* %ptr, i32 %n) nounwind {
define amdgpu_kernel void @loop_const_true(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
br label %for.body
@ -52,7 +52,7 @@ for.body:
; GCN-LABEL: {{^}}loop_const_false:
; GCN-NOT: s_branch
; GCN: s_endpgm
define void @loop_const_false(float addrspace(3)* %ptr, i32 %n) nounwind {
define amdgpu_kernel void @loop_const_false(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
br label %for.body
@ -74,7 +74,7 @@ for.body:
; GCN-LABEL: {{^}}loop_const_undef:
; GCN-NOT: s_branch
; GCN: s_endpgm
define void @loop_const_undef(float addrspace(3)* %ptr, i32 %n) nounwind {
define amdgpu_kernel void @loop_const_undef(float addrspace(3)* %ptr, i32 %n) nounwind {
entry:
br label %for.body
@ -104,7 +104,7 @@ for.body:
; GCN: s_cbranch_vccnz [[LOOPBB]]
; GCN-NEXT: ; BB#2
; GCN-NEXT: s_endpgm
define void @loop_arg_0(float addrspace(3)* %ptr, i32 %n, i1 %cond) nounwind {
define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n, i1 %cond) nounwind {
entry:
br label %for.body

View File

@ -35,7 +35,7 @@
; BUG32-NOT: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
; FUNC-LABEL: {{^}}nested3:
define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
define amdgpu_kernel void @nested3(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
br i1 %0, label %if.1, label %end
@ -68,7 +68,7 @@ end:
; BUG32-NOT: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
; FUNC-LABEL: {{^}}nested4:
define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
define amdgpu_kernel void @nested4(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
br i1 %0, label %if.1, label %end
@ -109,7 +109,7 @@ end:
; BUG32-NOT: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
; FUNC-LABEL: {{^}}nested7:
define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
define amdgpu_kernel void @nested7(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
br i1 %0, label %if.1, label %end
@ -174,7 +174,7 @@ end:
; BUG32: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
; FUNC-LABEL: {{^}}nested8:
define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
define amdgpu_kernel void @nested8(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
br i1 %0, label %if.1, label %end

View File

@ -4,6 +4,6 @@
; EG: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80]
; CM: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88]
define void @eop() {
define amdgpu_kernel void @eop() {
ret void
}

View File

@ -11,7 +11,7 @@
; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
; GCN: flat_load_dword
; GCN: {{^}}BB0_2:
define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
entry:
%out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
@ -43,7 +43,7 @@ done:
; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
define void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
entry:
%out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
@ -76,7 +76,7 @@ done:
; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
define void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
entry:
%out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7

View File

@ -15,7 +15,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
; GCN: {{^}}BB0_2:
define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
define amdgpu_kernel void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
@ -45,7 +45,7 @@ done:
; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
; GCN: {{^}}BB1_2:
; GCN: s_or_b64 exec
define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
%in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
@ -72,7 +72,7 @@ done:
; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
; GCN: {{^}}BB2_2:
; GCN: s_or_b64 exec
define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
%in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
@ -99,7 +99,7 @@ done:
; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
; GCN: {{^}}BB3_2:
; GCN: s_or_b64 exec
define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
%in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
@ -131,7 +131,7 @@ done:
; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
; GCN: {{^}}BB4_2:
define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
@ -172,7 +172,7 @@ done:
; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
; GCN: {{^BB[0-9]+}}_2:
define void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
@ -209,7 +209,7 @@ done:
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; GCN: {{^BB[0-9]+}}_2:
define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
@ -241,7 +241,7 @@ done:
; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN: {{^BB[0-9]+}}_2:
define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
entry:
%offset.ext = zext i32 %offset to i64
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
@ -271,7 +271,7 @@ done:
; GCN: s_and_saveexec_b64
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
@ -300,7 +300,7 @@ done:
; GCN: s_and_saveexec_b64
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
@ -333,7 +333,7 @@ done:
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
@ -365,7 +365,7 @@ done:
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
@ -396,7 +396,7 @@ done:
; GCN: s_addc_u32
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
@ -426,7 +426,7 @@ done:
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
@ -464,7 +464,7 @@ done:
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
@ -494,7 +494,7 @@ done:
; GCN: s_load_dword [[SREG1:s[0-9]+]],
; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
define amdgpu_kernel void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
%y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
@ -521,7 +521,7 @@ bb34:
; OPT: if:
; OPT: %sunkaddr = ptrtoint i8 addrspace(2)* %in to i64
; OPT: %sunkaddr1 = add i64 %sunkaddr, 4095
define void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
%in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
@ -548,7 +548,7 @@ done:
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
; OPT: %tmp1 = atomicrmw add i32 addrspace(3)* %sunkaddr2, i32 2 seq_cst
define void @test_sink_local_small_offset_atomicrmw_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @test_sink_local_small_offset_atomicrmw_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
@ -574,7 +574,7 @@ done:
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
; OPT: %tmp1.struct = cmpxchg i32 addrspace(3)* %sunkaddr2, i32 undef, i32 2 seq_cst monotonic
define void @test_sink_local_small_offset_cmpxchg_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
@ -600,7 +600,7 @@ done:
; OPT: %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
; OPT: br i1
; OPT: cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic
define void @test_wrong_operand_local_small_offset_cmpxchg_i32(i32 addrspace(3)* addrspace(3)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @test_wrong_operand_local_small_offset_cmpxchg_i32(i32 addrspace(3)* addrspace(3)* %out, i32 addrspace(3)* %in) {
entry:
%out.gep = getelementptr i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* %out, i32 999999
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
@ -627,7 +627,7 @@ done:
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
define void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
@ -653,7 +653,7 @@ done:
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
define void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
define amdgpu_kernel void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7

View File

@ -36,7 +36,7 @@
; GCN: BB0_3:
; GCN: buffer_store_dword
; GCN: s_endpgm
define void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
%shr = lshr i32 %arg1, 8
br i1 undef, label %bb0, label %bb1
@ -76,7 +76,7 @@ ret:
; OPT: ret
; GCN-LABEL: {{^}}sink_sbfe_i32:
define void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
%shr = ashr i32 %arg1, 8
br i1 undef, label %bb0, label %bb1
@ -134,7 +134,7 @@ ret:
; GCN: BB2_3:
; GCN: buffer_store_short
; GCN: s_endpgm
define void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
entry:
%shr = lshr i16 %arg1, 4
br i1 undef, label %bb0, label %bb1
@ -187,7 +187,7 @@ ret:
; GCN: BB3_3:
; GCN: buffer_store_dwordx2
define void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 30
br i1 undef, label %bb0, label %bb1
@ -236,7 +236,7 @@ ret:
; GCN: BB4_3:
; GCN: buffer_store_dwordx2
define void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 15
br i1 undef, label %bb0, label %bb1
@ -283,7 +283,7 @@ ret:
; GCN: BB5_3:
; GCN: buffer_store_dwordx2
define void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
%shr = lshr i64 %arg1, 35
br i1 undef, label %bb0, label %bb1

View File

@ -8,7 +8,7 @@ declare i1 @llvm.amdgcn.class.f32(float, i32)
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
%c = icmp eq i32 %arg3, 0
@ -35,7 +35,7 @@ bb2:
; GCN-NOT: vcc
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
%undef = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)

View File

@ -13,7 +13,7 @@ declare float @llvm.fma.f32(float, float, float)
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
; It's probably OK if this is slightly higher:
; CHECK: ; NumVgprs: 8
define void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
define amdgpu_kernel void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
entry:
%cmpflag = icmp eq i32 %flag, 1
br i1 %cmpflag, label %loop, label %exit

View File

@ -8,7 +8,7 @@
; SI-LLC-LABEL: {{^}}test:
; SI-LLC: s_mul_i32
; SI-LLC-NOT: mul
define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
define amdgpu_kernel void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
entry:
%0 = mul nsw i32 %a, 3
%1 = sext i32 %0 to i64

View File

@ -12,7 +12,7 @@
; EG-LABEL: {{^}}combine_vloads:
; EG: VTX_READ_128
; EG: VTX_READ_128
define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
define amdgpu_kernel void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
entry:
br label %for.body

View File

@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN-LABEL: {{^}}commute_eq_64_i32:
; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}}
define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -21,7 +21,7 @@ define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
; GCN-LABEL: {{^}}commute_ne_64_i32:
; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}}
define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -36,7 +36,7 @@ define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
; GCN-LABEL: {{^}}commute_ne_litk_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -49,7 +49,7 @@ define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; GCN-LABEL: {{^}}commute_ugt_64_i32:
; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -62,7 +62,7 @@ define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_uge_64_i32:
; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -75,7 +75,7 @@ define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_ult_64_i32:
; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -88,7 +88,7 @@ define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_ule_63_i32:
; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -104,7 +104,7 @@ define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_ule_64_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}}
; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -117,7 +117,7 @@ define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}}
define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -130,7 +130,7 @@ define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; GCN-LABEL: {{^}}commute_sge_neg2_i32:
; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -143,7 +143,7 @@ define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; GCN-LABEL: {{^}}commute_slt_neg16_i32:
; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -156,7 +156,7 @@ define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in
; GCN-LABEL: {{^}}commute_sle_5_i32:
; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -173,7 +173,7 @@ define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
; GCN-LABEL: {{^}}commute_eq_64_i64:
; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -186,7 +186,7 @@ define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1
; GCN-LABEL: {{^}}commute_ne_64_i64:
; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -199,7 +199,7 @@ define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1
; GCN-LABEL: {{^}}commute_ugt_64_i64:
; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -212,7 +212,7 @@ define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_uge_64_i64:
; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -225,7 +225,7 @@ define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_ult_64_i64:
; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -238,7 +238,7 @@ define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_ule_63_i64:
; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -254,7 +254,7 @@ define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_ule_64_i64:
; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -267,7 +267,7 @@ define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -280,7 +280,7 @@ define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in)
; GCN-LABEL: {{^}}commute_sge_neg2_i64:
; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -293,7 +293,7 @@ define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in)
; GCN-LABEL: {{^}}commute_slt_neg16_i64:
; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -306,7 +306,7 @@ define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in
; GCN-LABEL: {{^}}commute_sle_5_i64:
; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -324,7 +324,7 @@ define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1
; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -338,7 +338,7 @@ define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -351,7 +351,7 @@ define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_oge_2.0_f32:
; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -364,7 +364,7 @@ define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_olt_2.0_f32:
; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -377,7 +377,7 @@ define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_ole_2.0_f32:
; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -390,7 +390,7 @@ define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_one_2.0_f32:
; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -403,7 +403,7 @@ define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_ord_2.0_f32:
; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -416,7 +416,7 @@ define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -429,7 +429,7 @@ define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -442,7 +442,7 @@ define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_uge_2.0_f32:
; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -455,7 +455,7 @@ define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_ult_2.0_f32:
; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -468,7 +468,7 @@ define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_ule_2.0_f32:
; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -481,7 +481,7 @@ define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_une_2.0_f32:
; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -494,7 +494,7 @@ define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_uno_2.0_f32:
; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -512,7 +512,7 @@ define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -526,7 +526,7 @@ define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -539,7 +539,7 @@ define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_oge_2.0_f64:
; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -552,7 +552,7 @@ define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_olt_2.0_f64:
; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -565,7 +565,7 @@ define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_ole_2.0_f64:
; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -578,7 +578,7 @@ define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_one_2.0_f64:
; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -591,7 +591,7 @@ define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_ord_2.0_f64:
; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -604,7 +604,7 @@ define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -617,7 +617,7 @@ define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -630,7 +630,7 @@ define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_uge_2.0_f64:
; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -643,7 +643,7 @@ define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_ult_2.0_f64:
; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -656,7 +656,7 @@ define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_ule_2.0_f64:
; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -669,7 +669,7 @@ define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_une_2.0_f64:
; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -682,7 +682,7 @@ define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN-LABEL: {{^}}commute_uno_2.0_f64:
; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -703,7 +703,7 @@ define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}}
define void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
entry:
%stack0 = alloca i32
%ptr0 = load volatile i32*, i32* addrspace(1)* undef

View File

@ -8,7 +8,7 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0
; SI: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%x = load float, float addrspace(1)* %gep.0
@ -22,7 +22,7 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -4.0
; SI: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%x = load float, float addrspace(1)* %gep.0
@ -37,7 +37,7 @@ define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrs
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
; SI: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%x = load float, float addrspace(1)* %gep.0
@ -53,7 +53,7 @@ define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[K]], |[[X]]|
; SI: buffer_store_dword [[REG]]
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%x = load float, float addrspace(1)* %gep.0
@ -68,7 +68,7 @@ define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
; SI: buffer_store_dword [[REG]]
define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -85,7 +85,7 @@ define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)*
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
; SI: buffer_store_dword [[REG]]
define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -102,7 +102,7 @@ define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)*
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
; SI: buffer_store_dword [[REG]]
define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -121,7 +121,7 @@ define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
; SI: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -139,7 +139,7 @@ define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrs
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
; SI: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -161,7 +161,7 @@ define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float
; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, |[[R2]]|
; SI: buffer_store_dword [[RESULT]]
define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
define amdgpu_kernel void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1

View File

@ -8,7 +8,7 @@
; value if we want to ensure scratch memory is not being used.
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
define amdgpu_kernel void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
%concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
store <2 x i32> %concat, <2 x i32> addrspace(1)* %out, align 8
ret void
@ -17,7 +17,7 @@ define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x
; FUNC-LABEL: {{^}}test_concat_v2i32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
define amdgpu_kernel void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i32> %concat, <4 x i32> addrspace(1)* %out, align 16
ret void
@ -26,7 +26,7 @@ define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x
; FUNC-LABEL: {{^}}test_concat_v4i32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
define amdgpu_kernel void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
%concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i32> %concat, <8 x i32> addrspace(1)* %out, align 32
ret void
@ -35,7 +35,7 @@ define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x
; FUNC-LABEL: {{^}}test_concat_v8i32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
define amdgpu_kernel void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
%concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i32> %concat, <16 x i32> addrspace(1)* %out, align 64
ret void
@ -44,7 +44,7 @@ define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x
; FUNC-LABEL: {{^}}test_concat_v16i32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
define amdgpu_kernel void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
%concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i32> %concat, <32 x i32> addrspace(1)* %out, align 128
ret void
@ -53,7 +53,7 @@ define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <1
; FUNC-LABEL: {{^}}test_concat_v1f32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
define amdgpu_kernel void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
%concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> <i32 0, i32 1>
store <2 x float> %concat, <2 x float> addrspace(1)* %out, align 8
ret void
@ -62,7 +62,7 @@ define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <
; FUNC-LABEL: {{^}}test_concat_v2f32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
define amdgpu_kernel void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x float> %concat, <4 x float> addrspace(1)* %out, align 16
ret void
@ -71,7 +71,7 @@ define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <
; FUNC-LABEL: {{^}}test_concat_v4f32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
define amdgpu_kernel void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x float> %concat, <8 x float> addrspace(1)* %out, align 32
ret void
@ -80,7 +80,7 @@ define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <
; FUNC-LABEL: {{^}}test_concat_v8f32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
define amdgpu_kernel void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x float> %concat, <16 x float> addrspace(1)* %out, align 64
ret void
@ -89,7 +89,7 @@ define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a,
; FUNC-LABEL: {{^}}test_concat_v16f32:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
define amdgpu_kernel void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x float> %concat, <32 x float> addrspace(1)* %out, align 128
ret void
@ -98,7 +98,7 @@ define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a
; FUNC-LABEL: {{^}}test_concat_v1i64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
ret void
@ -107,7 +107,7 @@ define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a,
; FUNC-LABEL: {{^}}test_concat_v2i64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
ret void
@ -116,7 +116,7 @@ define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a,
; FUNC-LABEL: {{^}}test_concat_v4i64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
ret void
@ -125,7 +125,7 @@ define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a,
; FUNC-LABEL: {{^}}test_concat_v8i64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
ret void
@ -134,7 +134,7 @@ define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a
; FUNC-LABEL: {{^}}test_concat_v16i64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
ret void
@ -143,7 +143,7 @@ define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double>
; FUNC-LABEL: {{^}}test_concat_v1f64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
ret void
@ -152,7 +152,7 @@ define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a,
; FUNC-LABEL: {{^}}test_concat_v2f64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
ret void
@ -161,7 +161,7 @@ define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a,
; FUNC-LABEL: {{^}}test_concat_v4f64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
ret void
@ -170,7 +170,7 @@ define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a,
; FUNC-LABEL: {{^}}test_concat_v8f64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
ret void
@ -179,7 +179,7 @@ define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a
; FUNC-LABEL: {{^}}test_concat_v16f64:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
define amdgpu_kernel void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
ret void
@ -188,7 +188,7 @@ define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double>
; FUNC-LABEL: {{^}}test_concat_v1i1:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
define amdgpu_kernel void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
%concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> <i32 0, i32 1>
store <2 x i1> %concat, <2 x i1> addrspace(1)* %out
ret void
@ -197,7 +197,7 @@ define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1>
; FUNC-LABEL: {{^}}test_concat_v2i1:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
define amdgpu_kernel void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
%concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i1> %concat, <4 x i1> addrspace(1)* %out
ret void
@ -206,7 +206,7 @@ define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1>
; FUNC-LABEL: {{^}}test_concat_v4i1:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
define amdgpu_kernel void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
%concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i1> %concat, <8 x i1> addrspace(1)* %out
ret void
@ -215,7 +215,7 @@ define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1>
; FUNC-LABEL: {{^}}test_concat_v8i1:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
define amdgpu_kernel void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
%concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i1> %concat, <16 x i1> addrspace(1)* %out
ret void
@ -224,7 +224,7 @@ define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1
; FUNC-LABEL: {{^}}test_concat_v16i1:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
define amdgpu_kernel void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
%concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i1> %concat, <32 x i1> addrspace(1)* %out
ret void
@ -233,7 +233,7 @@ define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x
; FUNC-LABEL: {{^}}test_concat_v32i1:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
define amdgpu_kernel void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
%concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
store <64 x i1> %concat, <64 x i1> addrspace(1)* %out
ret void
@ -242,7 +242,7 @@ define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x
; FUNC-LABEL: {{^}}test_concat_v1i16:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
define amdgpu_kernel void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
%concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> <i32 0, i32 1>
store <2 x i16> %concat, <2 x i16> addrspace(1)* %out, align 4
ret void
@ -251,7 +251,7 @@ define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x
; FUNC-LABEL: {{^}}test_concat_v2i16:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
define amdgpu_kernel void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
%concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i16> %concat, <4 x i16> addrspace(1)* %out, align 8
ret void
@ -260,7 +260,7 @@ define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x
; FUNC-LABEL: {{^}}test_concat_v4i16:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
define amdgpu_kernel void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
%concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i16> %concat, <8 x i16> addrspace(1)* %out, align 16
ret void
@ -269,7 +269,7 @@ define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x
; FUNC-LABEL: {{^}}test_concat_v8i16:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
define amdgpu_kernel void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
%concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i16> %concat, <16 x i16> addrspace(1)* %out, align 32
ret void
@ -278,7 +278,7 @@ define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x
; FUNC-LABEL: {{^}}test_concat_v16i16:
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
define amdgpu_kernel void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
%concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64
ret void
@ -286,7 +286,7 @@ define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <1
; FUNC-LABEL: {{^}}concat_vector_crash:
; SI: s_endpgm
define void @concat_vector_crash(<8 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
define amdgpu_kernel void @concat_vector_crash(<8 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
bb:
%tmp = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
%tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>

View File

@ -1,12 +1,12 @@
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
--- |
define void @s_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
define amdgpu_kernel void @s_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%and = and i32 %a, 1234567
store volatile i32 %and, i32 addrspace(1)* %out
ret void
}
define void @v_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
define amdgpu_kernel void @v_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
@ -17,13 +17,13 @@
ret void
}
define void @s_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
define amdgpu_kernel void @s_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%shl = shl i32 %a, 12
store volatile i32 %shl, i32 addrspace(1)* %out
ret void
}
define void @v_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
define amdgpu_kernel void @v_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
@ -34,13 +34,13 @@
ret void
}
define void @s_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
define amdgpu_kernel void @s_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%ashr = ashr i32 %a, 12
store volatile i32 %ashr, i32 addrspace(1)* %out
ret void
}
define void @v_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
define amdgpu_kernel void @v_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
@ -51,13 +51,13 @@
ret void
}
define void @s_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
define amdgpu_kernel void @s_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%lshr = lshr i32 %a, 12
store volatile i32 %lshr, i32 addrspace(1)* %out
ret void
}
define void @v_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
define amdgpu_kernel void @v_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom

View File

@ -5,7 +5,7 @@
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
define void @fold_mi_v_and_0(i32 addrspace(1)* %out) {
define amdgpu_kernel void @fold_mi_v_and_0(i32 addrspace(1)* %out) {
%x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%size = call i32 @llvm.amdgcn.groupstaticsize()
%and = and i32 %size, %x
@ -17,7 +17,7 @@ define void @fold_mi_v_and_0(i32 addrspace(1)* %out) {
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
define void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 {
define amdgpu_kernel void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%and = and i32 %size, %x
store i32 %and, i32 addrspace(1)* %out
@ -28,7 +28,7 @@ define void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 {
; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
define void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
%x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%size = call i32 @llvm.amdgcn.groupstaticsize()
%or = or i32 %size, %x
@ -42,7 +42,7 @@ define void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
; GCN-NOT: [[VVAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 {
define amdgpu_kernel void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%or = or i32 %size, %x
store i32 %or, i32 addrspace(1)* %out
@ -53,7 +53,7 @@ define void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 {
; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
define void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
%x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%size = call i32 @llvm.amdgcn.groupstaticsize()
%xor = xor i32 %size, %x
@ -67,7 +67,7 @@ define void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
; GCN-NOT: [[VVAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 {
define amdgpu_kernel void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%xor = xor i32 %size, %x
store i32 %xor, i32 addrspace(1)* %out
@ -78,7 +78,7 @@ define void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 {
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}}
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
define void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%xor = xor i32 %size, -1
store i32 %xor, i32 addrspace(1)* %out
@ -91,7 +91,7 @@ define void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
define void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
%vreg = load volatile i64, i64 addrspace(1)* undef
%ctpop = call i64 @llvm.ctpop.i64(i64 %vreg)
%xor = xor i64 %ctpop, -1
@ -110,7 +110,7 @@ define void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
define void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
%vreg0 = load volatile i64, i64 addrspace(1)* undef
%vreg1 = load volatile i64, i64 addrspace(1)* undef
%ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)
@ -126,7 +126,7 @@ define void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
; GCN: v_not_b32
; GCN: v_and_b32
; GCN-NOT: v_and_b32
define void @fold_mi_and_neg1(i64 addrspace(1)* %out) {
define amdgpu_kernel void @fold_mi_and_neg1(i64 addrspace(1)* %out) {
%vreg0 = load volatile i64, i64 addrspace(1)* undef
%vreg1 = load volatile i64, i64 addrspace(1)* undef
%ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)

View File

@ -72,7 +72,7 @@
; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]]
define void @divergent_if_endif(i32 addrspace(1)* %out) #0 {
define amdgpu_kernel void @divergent_if_endif(i32 addrspace(1)* %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%load0 = load volatile i32, i32 addrspace(3)* undef
@ -150,7 +150,7 @@ endif:
; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]
define void @divergent_loop(i32 addrspace(1)* %out) #0 {
define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%load0 = load volatile i32, i32 addrspace(3)* undef
@ -272,7 +272,7 @@ end:
; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]]
define void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 {
define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%load0 = load volatile i32, i32 addrspace(3)* undef

View File

@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN: v_cmp_ne_u32_e64
; GCN: ; mask branch
; GCN: BB{{[0-9]+_[0-9]+}}:
define void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
define amdgpu_kernel void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) #1
@ -30,7 +30,7 @@ bb5: ; preds = %bb3, %bb
; GCN: BB{{[0-9]+_[0-9]+}}:
define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
define amdgpu_kernel void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1)

View File

@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
; GCN: buffer_load_dword [[REG:v[0-9]+]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
@ -19,7 +19,7 @@ define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)*
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
@ -32,7 +32,7 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
@ -47,7 +47,7 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; GCN: buffer_store_dword [[REG]]
; GCN: buffer_store_dword [[REG]]
; GCN: s_endpgm
define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
@ -65,7 +65,7 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; GCN-DAG: buffer_store_dword
; GCN: s_endpgm
define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@ -85,7 +85,7 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
; GCN: {{buffer|flat}}_store_dword
; GCN: {{buffer|flat}}_store_dword
; GCN: s_endpgm
define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4
@ -101,7 +101,7 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN: s_endpgm
define void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
ret void
@ -113,7 +113,7 @@ define void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspa
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN: s_endpgm
define void @test_copy_v3i8_align2(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v3i8_align2(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 2
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 2
ret void
@ -128,7 +128,7 @@ define void @test_copy_v3i8_align2(<3 x i8> addrspace(1)* %out, <3 x i8> addrspa
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: s_endpgm
define void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1
ret void
@ -141,7 +141,7 @@ define void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3 x i8> addrspa
; GCN: buffer_load_ubyte
; GCN: buffer_store_dword
; GCN: s_endpgm
define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load volatile <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
@ -157,7 +157,7 @@ define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8>
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: s_endpgm
define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
define amdgpu_kernel void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void

View File

@ -6,7 +6,7 @@
; Make sure this doesn't crash
; CHECK-LABEL: {{^}}copy_to_reg_frameindex:
define void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
define amdgpu_kernel void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%alloca = alloca [16 x i32]
br label %loop

View File

@ -27,7 +27,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; EG: FFBH_UINT
; EG: CNDE_INT
define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
@ -43,7 +43,7 @@ define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; EG: FFBH_UINT
; EG: CNDE_INT
define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
@ -61,7 +61,7 @@ define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalia
; EG: CNDE_INT
; EG: FFBH_UINT
; EG: CNDE_INT
define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
@ -89,7 +89,7 @@ define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrsp
; EG-DAG: FFBH_UINT
; EG-DAG: CNDE_INT
define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
@ -101,7 +101,7 @@ define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrsp
; GCN-DAG: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_byte [[RESULT]],
; GCN: s_endpgm
define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%val = load i8, i8 addrspace(1)* %valptr
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
store i8 %ctlz, i8 addrspace(1)* %out
@ -119,14 +119,14 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
store i64 %ctlz, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_ctlz_i64_trunc:
define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
%trunc = trunc i64 %ctlz to i32
store i32 %trunc, i32 addrspace(1)* %out
@ -145,7 +145,7 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind
; GCN-DAG: v_cmp_ne_u32_e32 vcc, 0, [[OR]]
; GCN-DAG: v_cndmask_b32_e32 v[[CLTZ_LO:[0-9]+]], 64, v[[CTLZ:[0-9]+]], vcc
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -156,7 +156,7 @@ define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalia
}
; FUNC-LABEL: {{^}}v_ctlz_i64_trunc:
define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -172,7 +172,7 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)*
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %val, 0
@ -186,7 +186,7 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)*
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %val, 0
@ -202,7 +202,7 @@ define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspac
; GCN: v_cmp
; GCN: v_cndmask
; GCN: s_endpgm
define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %ctlz, 32
@ -217,7 +217,7 @@ define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
; GCN: v_cmp
; GCN: v_cndmask
; GCN: s_endpgm
define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %ctlz, 32
@ -230,7 +230,7 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; GCN: {{buffer|flat}}_store_byte [[FFBH]],
define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
%val = load i8, i8 addrspace(1)* %valptr.gep
@ -245,7 +245,7 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
; SI: buffer_load_ushort [[VAL:v[0-9]+]],
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; SI: buffer_store_short [[FFBH]],
define void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
%val = load i16, i16 addrspace(1)* %valptr
%ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
%cmp = icmp eq i16 %val, 0
@ -260,7 +260,7 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
; GCN: {{buffer|flat}}_store_byte [[TRUNC]],
define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
%val = load i7, i7 addrspace(1)* %valptr.gep

View File

@ -22,7 +22,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; GCN: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
@ -35,7 +35,7 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
; GCN: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
@ -51,7 +51,7 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
@ -71,7 +71,7 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
@ -82,7 +82,7 @@ define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x
; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_byte [[RESULT]],
define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%val = load i8, i8 addrspace(1)* %valptr
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
store i8 %ctlz, i8 addrspace(1)* %out
@ -100,14 +100,14 @@ define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
define void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
store i64 %ctlz, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64_trunc:
define void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
%trunc = trunc i64 %ctlz to i32
store i32 %trunc, i32 addrspace(1)* %out
@ -123,7 +123,7 @@ define void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %va
; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
define void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -134,7 +134,7 @@ define void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64_trunc:
define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -149,7 +149,7 @@ define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 add
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
define void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
@ -162,7 +162,7 @@ define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 add
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 0
@ -175,7 +175,7 @@ define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i
; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; GCN: {{buffer|flat}}_store_byte [[FFBH]],
define void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
%val = load i8, i8 addrspace(1)* %valptr.gep
@ -194,7 +194,7 @@ define void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8
; GCN-DAG: buffer_store_dword [[RESULT0]]
; GCN-DAG: buffer_store_byte [[RESULT1]]
; GCN: s_endpgm
define void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
@ -211,7 +211,7 @@ define void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
@ -227,7 +227,7 @@ define void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 0
@ -243,7 +243,7 @@ define void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 1
@ -259,7 +259,7 @@ define void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: buffer_store_dword
define void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 1

View File

@ -16,7 +16,7 @@ declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
; GCN: s_endpgm
; EG: BCNT_INT
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
store i32 %ctpop, i32 addrspace(1)* %out, align 4
ret void
@ -30,7 +30,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
store i32 %ctpop, i32 addrspace(1)* %out, align 4
@ -48,7 +48,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
%val0 = load i32, i32 addrspace(1)* %in0, align 4
%val1 = load i32, i32 addrspace(1)* %in1, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
@ -64,7 +64,7 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
%val0 = load i32, i32 addrspace(1)* %in0, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
%add = add i32 %ctpop0, %sval
@ -79,7 +79,7 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
@ -97,7 +97,7 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
@ -123,7 +123,7 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
@ -165,7 +165,7 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
%val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
%ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
@ -179,7 +179,7 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 4
@ -194,7 +194,7 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 4, %ctpop
@ -209,7 +209,7 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 99999
@ -225,7 +225,7 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, %const
@ -241,7 +241,7 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %const, %ctpop
@ -258,7 +258,7 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
@ -279,7 +279,7 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
define amdgpu_kernel void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %if, label %else

View File

@ -17,7 +17,7 @@ declare i128 @llvm.ctpop.i128(i128) nounwind readnone
; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; GCN: buffer_store_dword [[VRESULT]],
; GCN: s_endpgm
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@ -31,7 +31,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%val = load i64, i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
@ -48,7 +48,7 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN: s_endpgm
define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
%val = load i64, i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%or = or i64 %ctpop, %s.val
@ -60,7 +60,7 @@ define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)*
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
; GCN: s_endpgm
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
define amdgpu_kernel void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
@ -73,7 +73,7 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
; GCN: s_endpgm
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
define amdgpu_kernel void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
@ -86,7 +86,7 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
@ -104,7 +104,7 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
@ -121,7 +121,7 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[ZERO]]
; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
; GCN: s_endpgm
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
define amdgpu_kernel void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %if, label %else
@ -146,7 +146,7 @@ endif:
; GCN: s_bcnt1_i32_b64 [[SRESULT1:s[0-9]+]],
; GCN: s_add_i32 s{{[0-9]+}}, [[SRESULT1]], [[SRESULT0]]
; GCN: s_endpgm
define void @s_ctpop_i128(i32 addrspace(1)* noalias %out, i128 %val) nounwind {
define amdgpu_kernel void @s_ctpop_i128(i32 addrspace(1)* noalias %out, i128 %val) nounwind {
%ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone
%truncctpop = trunc i128 %ctpop to i32
store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@ -159,7 +159,7 @@ define void @s_ctpop_i128(i32 addrspace(1)* noalias %out, i128 %val) nounwind {
; GCN: s_bcnt1_i32_b64 [[REG1:s[0-9]+]],
; GCN: s_add_i32 {{s[0-9]+}}, [[REG0]], [[REG1]]
; GCN: s_endpgm
define void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val) nounwind {
define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val) nounwind {
%ctpop = call i65 @llvm.ctpop.i65(i65 %val) nounwind readnone
%truncctpop = trunc i65 %ctpop to i32
store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@ -181,7 +181,7 @@ define void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val) nounwind {
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind {
%val = load i128, i128 addrspace(1)* %in, align 8
%ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone
%truncctpop = trunc i128 %ctpop to i32

View File

@ -14,7 +14,7 @@ declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
store i32 %cttz, i32 addrspace(1)* %out, align 4
ret void
@ -27,7 +27,7 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr, align 4
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
store i32 %cttz, i32 addrspace(1)* %out, align 4
@ -43,7 +43,7 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
@ -63,7 +63,7 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16

View File

@ -12,7 +12,7 @@ declare float @llvm.amdgcn.cubema(float, float, float) #0
; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: _store_dwordx4
define void @cube(<4 x float> addrspace(1)* %out, float %a, float %b, float %c) #1 {
define amdgpu_kernel void @cube(<4 x float> addrspace(1)* %out, float %a, float %b, float %c) #1 {
%cubeid = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
%cubesc = call float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
%cubetc = call float @llvm.amdgcn.cubetc(float %a, float %b, float %c)

View File

@ -10,7 +10,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
; GCN-NOT: lshr
; GCN: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
; GCN: buffer_store_dword [[CONV]],
define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%load = load i8, i8 addrspace(1)* %in, align 1
%cvt = uitofp i8 %load to float
store float %cvt, float addrspace(1)* %out, align 4
@ -22,7 +22,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
; GCN-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LD]]
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LD]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
@ -36,7 +36,7 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
; GCN-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[VAL]]
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[VAL]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
@ -52,7 +52,7 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
; GCN-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; GCN: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@ -76,7 +76,7 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]]
; GCN: buffer_store_dwordx4
define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@ -110,7 +110,7 @@ define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out
; GCN: {{buffer|flat}}_store_dword
; GCN: s_endpgm
define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4
@ -124,7 +124,7 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
; Make sure this doesn't crash.
; GCN-LABEL: {{^}}load_v7i8_to_v7f32:
; GCN: s_endpgm
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
@ -147,7 +147,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
; GCN-NOT: lshr
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
@ -159,7 +159,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
; GCN-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
; GCN: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 2
%inreg = and i32 %add, 255
@ -169,7 +169,7 @@ define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addr
}
; GCN-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32, i32 addrspace(1)* %in, align 4
%inreg = and i32 %load, 65280
%shr = lshr i32 %inreg, 8
@ -181,7 +181,7 @@ define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addr
; We don't get these ones because of the zext, but instcombine removes
; them so it shouldn't really matter.
; GCN-LABEL: {{^}}i8_zext_i32_to_f32:
define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%load = load i8, i8 addrspace(1)* %in, align 1
%ext = zext i8 %load to i32
%cvt = uitofp i32 %ext to float
@ -190,7 +190,7 @@ define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1
}
; GCN-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32:
define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%ext = zext <4 x i8> %load to <4 x i32>
%cvt = uitofp <4 x i32> %ext to <4 x float>
@ -203,7 +203,7 @@ define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%and = and i32 %val, 255
%cvt = uitofp i32 %and to float
@ -216,7 +216,7 @@ define void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspac
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte1_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%srl = lshr i32 %val, 8
%and = and i32 %srl, 255
@ -230,7 +230,7 @@ define void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspac
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte2_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%srl = lshr i32 %val, 16
%and = and i32 %srl, 255
@ -244,7 +244,7 @@ define void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspac
; GCN-NOT: [[VAL]]
; GCN: v_cvt_f32_ubyte3_e32 [[CONV:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[CONV]]
define void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32, i32 addrspace(1)* %in
%srl = lshr i32 %val, 24
%and = and i32 %srl, 255

View File

@ -10,7 +10,7 @@ declare float @llvm.floor.f32(float) #1
; SI-NOT: add
; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
; SI: s_endpgm
define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
%floor = call float @llvm.floor.f32(float %x) #1
%cvt = fptosi float %floor to i32
store i32 %cvt, i32 addrspace(1)* %out
@ -22,7 +22,7 @@ define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
; SI: s_endpgm
define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 {
%fadd = fadd float %x, 1.0
%floor = call float @llvm.floor.f32(float %fadd) #1
%cvt = fptosi float %floor to i32
@ -35,7 +35,7 @@ define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 {
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
; SI: s_endpgm
define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%floor = call float @llvm.floor.f32(float %x.fabs) #1
%cvt = fptosi float %floor to i32
@ -48,7 +48,7 @@ define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
; SI: s_endpgm
define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
%x.fneg = fsub float -0.000000e+00, %x
%floor = call float @llvm.floor.f32(float %x.fneg) #1
%cvt = fptosi float %floor to i32
@ -61,7 +61,7 @@ define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
; SI: s_endpgm
define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
%floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1
@ -75,7 +75,7 @@ define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
; SI: v_floor_f32
; SI: v_cvt_u32_f32_e32
; SI: s_endpgm
define void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
%floor = call float @llvm.floor.f32(float %x) #1
%cvt = fptoui float %floor to i32
store i32 %cvt, i32 addrspace(1)* %out

View File

@ -9,7 +9,7 @@ declare float @llvm.floor.f32(float) #1
; SI-SAFE-NOT: v_cvt_rpi_i32_f32
; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
; SI: s_endpgm
define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 {
%fadd = fadd float %x, 0.5
%floor = call float @llvm.floor.f32(float %fadd) #1
%cvt = fptosi float %floor to i32
@ -21,7 +21,7 @@ define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 {
; SI-SAFE-NOT: v_cvt_rpi_i32_f32
; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
; SI: s_endpgm
define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%fadd = fadd float %x.fabs, 0.5
%floor = call float @llvm.floor.f32(float %fadd) #1
@ -37,7 +37,7 @@ define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
; SI: s_endpgm
define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
%x.fneg = fsub float -0.000000e+00, %x
%fadd = fadd float %x.fneg, 0.5
%floor = call float @llvm.floor.f32(float %fadd) #1
@ -55,7 +55,7 @@ define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
; SI: s_endpgm
define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
%fadd = fadd float %x.fabs.fneg, 0.5
@ -71,7 +71,7 @@ define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
; SI: v_floor_f32
; SI: v_cvt_u32_f32
; SI: s_endpgm
define void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
%fadd = fadd float %x, 0.5
%floor = call float @llvm.floor.f32(float %fadd) #1
%cvt = fptoui float %floor to i32

View File

@ -9,7 +9,7 @@
; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR]], [[SADDR]]
; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR]], [[SADDR]]
define void @store_same_base_ptr(i32 addrspace(1)* %out) {
define amdgpu_kernel void @store_same_base_ptr(i32 addrspace(1)* %out) {
entry:
%id = call i32 @llvm.amdgcn.workitem.id.x() #0
%offset = sext i32 %id to i64

View File

@ -10,7 +10,7 @@
; CHECK: {{^}}sint:
; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
define amdgpu_kernel void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%sint = load i32, i32 addrspace(1) * %in
@ -24,7 +24,7 @@ entry:
;CHECK: {{^}}uint:
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
define amdgpu_kernel void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%uint = load i32, i32 addrspace(1) * %in

View File

@ -4,7 +4,7 @@
; Test for a crash in the custom assembly dump code.
; SI: s_endpgm
define void @test(i32 addrspace(1)* %out) {
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
store i32 0, i32 addrspace(1)* %out
ret void
}

Some files were not shown because too many files have changed in this diff Show More