1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AMDGPU] Change constant addr space to 4

Differential Revision: https://reviews.llvm.org/D43170

llvm-svn: 325030
This commit is contained in:
Yaxun Liu 2018-02-13 18:00:25 +00:00
parent 3f5aaeaf08
commit c6e831c09d
90 changed files with 1240 additions and 1249 deletions

View File

@ -270,27 +270,17 @@ LLVM Address Space number is used throughout LLVM (for example, in LLVM IR).
.. table:: Address Space Mapping
:name: amdgpu-address-space-mapping-table
================== ================= =================
================== =================
LLVM Address Space Memory Space
------------------ -----------------------------------
\ Current Default Future Default
================== ================= =================
0 Generic (Flat) Generic (Flat)
1 Global Global
2 Constant Region (GDS)
3 Local (group/LDS) Local (group/LDS)
4 Region (GDS) Constant
5 Private (Scratch) Private (Scratch)
6 Constant 32-bit Constant 32-bit
================== ================= =================
Current Default
This is the current default address space mapping used for all languages.
This will shortly be deprecated.
Future Default
This will shortly be the only address space mapping for all languages using
AMDGPU backend.
================== =================
0 Generic (Flat)
1 Global
2 Region (GDS)
3 Local (group/LDS)
4 Constant
5 Private (Scratch)
6 Constant 32-bit
================== =================
.. _amdgpu-memory-scopes:

View File

@ -83,22 +83,22 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
def int_amdgcn_dispatch_ptr :
GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_queue_ptr :
GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_kernarg_segment_ptr :
GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_implicitarg_ptr :
GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_groupstaticsize :
@ -111,7 +111,7 @@ def int_amdgcn_dispatch_id :
def int_amdgcn_implicit_buffer_ptr :
GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[IntrNoMem, IntrSpeculatable]>;
// Set EXEC to the 64-bit value given.

View File

@ -222,7 +222,7 @@ struct AMDGPUAS {
MAX_COMMON_ADDRESS = 5,
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
LOCAL_ADDRESS = 3, ///< Address space for local memory.
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory

View File

@ -61,7 +61,7 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
/* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
};
static const AliasResult ASAliasRulesGenIsZero[6][6] = {
/* Flat Global Constant Group Region Private */
/* Flat Global Region Group Constant Private */
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
/* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias},
@ -72,9 +72,9 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
assert(AS.MAX_COMMON_ADDRESS <= 5);
if (AS.FLAT_ADDRESS == 0) {
assert(AS.GLOBAL_ADDRESS == 1 &&
AS.REGION_ADDRESS == 4 &&
AS.REGION_ADDRESS == 2 &&
AS.LOCAL_ADDRESS == 3 &&
AS.CONSTANT_ADDRESS == 2 &&
AS.CONSTANT_ADDRESS == 4 &&
AS.PRIVATE_ADDRESS == 5);
ASAliasRules = &ASAliasRulesGenIsZero;
} else {

View File

@ -116,7 +116,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (Info->hasKernargSegmentPtr()) {
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
const LLT P2 = LLT::pointer(2, 64);
const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
unsigned VReg = MRI.createGenericVirtualRegister(P2);
MRI.addLiveIn(InputPtrReg, VReg);
MIRBuilder.getMBB().addLiveIn(InputPtrReg);

View File

@ -12,6 +12,7 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
@ -29,8 +30,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
const LLT V2S16 = LLT::vector(2, 16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT P1 = LLT::pointer(1, 64);
const LLT P2 = LLT::pointer(2, 64);
const LLT P1 = LLT::pointer(AMDGPUAS::GLOBAL_ADDRESS, 64);
const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
setAction({G_ADD, S32}, Legal);
setAction({G_AND, S32}, Legal);

View File

@ -266,7 +266,7 @@ static StringRef computeDataLayout(const Triple &TT) {
// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32"
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
}

View File

@ -929,7 +929,7 @@ AMDGPUAS getAMDGPUAS(Triple T) {
AMDGPUAS AS;
AS.FLAT_ADDRESS = 0;
AS.PRIVATE_ADDRESS = 5;
AS.REGION_ADDRESS = 4;
AS.REGION_ADDRESS = 2;
return AS;
}

View File

@ -5,7 +5,7 @@
# REQUIRES: global-isel
--- |
define amdgpu_kernel void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
...
---
@ -91,50 +91,50 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
%0:sgpr(p2) = COPY $sgpr0_sgpr1
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4
%2:sgpr(p2) = G_GEP %0, %1
%2:sgpr(p4) = G_GEP %0, %1
%3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0)
$sgpr0 = COPY %3
%4:sgpr(s64) = G_CONSTANT i64 1020
%5:sgpr(p2) = G_GEP %0, %4
%5:sgpr(p4) = G_GEP %0, %4
%6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0)
$sgpr0 = COPY %6
%7:sgpr(s64) = G_CONSTANT i64 1024
%8:sgpr(p2) = G_GEP %0, %7
%8:sgpr(p4) = G_GEP %0, %7
%9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0)
$sgpr0 = COPY %9
%10:sgpr(s64) = G_CONSTANT i64 1048572
%11:sgpr(p2) = G_GEP %0, %10
%11:sgpr(p4) = G_GEP %0, %10
%12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0)
$sgpr0 = COPY %12
%13:sgpr(s64) = G_CONSTANT i64 1048576
%14:sgpr(p2) = G_GEP %0, %13
%14:sgpr(p4) = G_GEP %0, %13
%15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0)
$sgpr0 = COPY %15
%16:sgpr(s64) = G_CONSTANT i64 17179869180
%17:sgpr(p2) = G_GEP %0, %16
%17:sgpr(p4) = G_GEP %0, %16
%18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0)
$sgpr0 = COPY %18
%19:sgpr(s64) = G_CONSTANT i64 17179869184
%20:sgpr(p2) = G_GEP %0, %19
%20:sgpr(p4) = G_GEP %0, %19
%21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0)
$sgpr0 = COPY %21
%22:sgpr(s64) = G_CONSTANT i64 4294967292
%23:sgpr(p2) = G_GEP %0, %22
%23:sgpr(p4) = G_GEP %0, %22
%24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0)
$sgpr0 = COPY %24
%25:sgpr(s64) = G_CONSTANT i64 4294967296
%26:sgpr(p2) = G_GEP %0, %25
%26:sgpr(p4) = G_GEP %0, %25
%27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0)
$sgpr0 = COPY %27

View File

@ -18,28 +18,28 @@ define amdgpu_vs void @test_f32(float %arg0) {
}
; CHECK-LABEL: name: test_ptr2_byval
; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY $sgpr0_sgpr1
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; CHECK: G_LOAD [[S01]]
define amdgpu_vs void @test_ptr2_byval(i32 addrspace(2)* byval %arg0) {
%tmp0 = load volatile i32, i32 addrspace(2)* %arg0
define amdgpu_vs void @test_ptr2_byval(i32 addrspace(4)* byval %arg0) {
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
ret void
}
; CHECK-LABEL: name: test_ptr2_inreg
; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY $sgpr0_sgpr1
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; CHECK: G_LOAD [[S01]]
define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(2)* inreg %arg0) {
%tmp0 = load volatile i32, i32 addrspace(2)* %arg0
define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
ret void
}
; CHECK-LABEL: name: test_sgpr_alignment0
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK: [[S23:%[0-9]+]]:_(p2) = COPY $sgpr2_sgpr3
; CHECK: [[S23:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
; CHECK: G_LOAD [[S23]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(2)* inreg %arg1) {
%tmp0 = load volatile i32, i32 addrspace(2)* %arg1
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
%tmp0 = load volatile i32, i32 addrspace(4)* %arg1
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
ret void
}

View File

@ -3,7 +3,7 @@
# REQUIRES: global-isel
--- |
define amdgpu_kernel void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
define amdgpu_kernel void @load_constant(i32 addrspace(4)* %ptr0) { ret void }
define amdgpu_kernel void @load_global_uniform(i32 addrspace(1)* %ptr1) {
%tmp0 = load i32, i32 addrspace(1)* %ptr1
ret void
@ -30,7 +30,7 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
%0:_(p2) = COPY $sgpr0_sgpr1
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0)
...

View File

@ -9,10 +9,10 @@
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
%1 = load i32, i32 addrspace(2)* %0
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 1
%1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@ -21,10 +21,10 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
%1 = load i32, i32 addrspace(2)* %0
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 255
%1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@ -36,10 +36,10 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
%1 = load i32, i32 addrspace(2)* %0
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 256
%1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@ -51,10 +51,10 @@ entry:
; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; XGCN: s_endpgm
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
%1 = load i32, i32 addrspace(2)* %0
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296 ; 2 ^ 32
%1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@ -65,10 +65,10 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
%1 = load i32, i32 addrspace(2)* %0
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143
%1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@ -79,10 +79,10 @@ entry:
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
entry:
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
%1 = load i32, i32 addrspace(2)* %0
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144
%1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}

View File

@ -27,9 +27,9 @@ define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; VI: s_add_i32
; VI: s_add_i32
define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
%b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0, <2 x i16> addrspace(4)* %in1) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
%b = load <2 x i16>, <2 x i16> addrspace(4)* %in1
%add = add <2 x i16> %a, %b
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void
@ -41,8 +41,8 @@ define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; VI: s_add_i32
; VI: s_add_i32
define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
%add = add <2 x i16> %a, %a
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void

View File

@ -100,8 +100,8 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(2)* %ptr to i32*
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(4)* %ptr to i32*
%ld = load volatile i32, i32* %stof
ret void
}
@ -160,8 +160,8 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
load volatile i32, i32 addrspace(2)* %ftos
%ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
load volatile i32, i32 addrspace(4)* %ftos
ret void
}

View File

@ -4,9 +4,9 @@
; This test just checks that the compiler doesn't crash.
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(4)* inreg) #0 {
entry:
%1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
%1 = load <32 x i8>, <32 x i8> addrspace(4)* %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
%3 = extractelement <8 x i32> %2, i32 1
%4 = icmp ne i32 %3, 0

View File

@ -48,12 +48,12 @@
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(2)* [[DISPATCH_PTR]] to i32 addrspace(2)*
; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 1
; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP0]], align 4, !invariant.load !0
; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 2
; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP1]], align 4, !range !1, !invariant.load !0
; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 1
; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP0]], align 4, !invariant.load !0
; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 2
; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]], align 4, !range !1, !invariant.load !0
; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16
; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !2

View File

@ -8,10 +8,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
; HSA: define void @use_workitem_id_x() #1 {
@ -58,15 +58,15 @@ define void @use_workgroup_id_z() #1 {
; HSA: define void @use_dispatch_ptr() #7 {
define void @use_dispatch_ptr() #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
store volatile i8 addrspace(2)* %dispatch.ptr, i8 addrspace(2)* addrspace(1)* undef
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
; HSA: define void @use_queue_ptr() #8 {
define void @use_queue_ptr() #1 {
%queue.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
store volatile i8 addrspace(2)* %queue.ptr, i8 addrspace(2)* addrspace(1)* undef
%queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
store volatile i8 addrspace(4)* %queue.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
@ -186,22 +186,22 @@ define void @call_recursive_use_workitem_id_y() #1 {
; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 {
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
store volatile i32 0, i32 addrspace(2)* %stof
ret void
}
; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 {
define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
store volatile i32 0, i32 addrspace(2)* %stof
ret void
}
; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 {
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
store volatile i32 0, i32 addrspace(2)* %stof
call void @func_indirect_use_queue_ptr()
ret void
}
@ -226,8 +226,8 @@ define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
; HSA: define void @use_kernarg_segment_ptr() #14 {
define void @use_kernarg_segment_ptr() #1 {
%kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
store volatile i8 addrspace(2)* %kernarg.segment.ptr, i8 addrspace(2)* addrspace(1)* undef
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
@ -239,15 +239,15 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
; HSA: define void @use_implicitarg_ptr() #15 {
define void @use_implicitarg_ptr() #1 {
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}

View File

@ -8,9 +8,9 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
@ -149,27 +149,27 @@ define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %bc
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %bc
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %bc
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
@ -210,9 +210,9 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p
ret void
}
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(2)* %ptr to i32*
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(4)* %ptr to i32*
%ld = load volatile i32, i32* %stof
ret void
}
@ -226,8 +226,8 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
%ld = load volatile i32, i32 addrspace(2)* %ftos
%ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
%ld = load volatile i32, i32 addrspace(4)* %ftos
ret void
}

View File

@ -358,7 +358,7 @@ bb0:
br i1 %cmp0, label %bb2, label %bb1
bb1:
%val = load volatile i32, i32 addrspace(2)* undef
%val = load volatile i32, i32 addrspace(4)* undef
%cmp1 = icmp eq i32 %val, 3
br i1 %cmp1, label %bb3, label %bb2

View File

@ -345,7 +345,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; GCN: s_waitcnt
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
%ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef
%ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
%val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
call void @external_void_func_v8i32(<8 x i32> %val)
ret void
@ -359,7 +359,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; GCN: s_waitcnt
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
%ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef
%ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
%val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
call void @external_void_func_v16i32(<16 x i32> %val)
ret void
@ -377,7 +377,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; GCN: s_waitcnt
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
%ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
%ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
call void @external_void_func_v32i32(<32 x i32> %val)
ret void
@ -405,7 +405,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; GCN: s_swappc_b64
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
%val1 = load i32, i32 addrspace(1)* undef
call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
@ -430,7 +430,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(2)* undef
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
ret void
@ -516,7 +516,7 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-LABEL: {{^}}test_call_external_void_func_v16i8:
define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
%ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef
%ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
%val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
call void @external_void_func_v16i8(<16 x i8> %val)
ret void

View File

@ -4,9 +4,9 @@
; GCN-LABEL: {{^}}use_dispatch_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
define void @use_dispatch_ptr() #1 {
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %header_ptr
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
%value = load volatile i32, i32 addrspace(4)* %header_ptr
ret void
}
@ -21,9 +21,9 @@ define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
; GCN-LABEL: {{^}}use_queue_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
define void @use_queue_ptr() #1 {
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
%header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %header_ptr
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
%value = load volatile i32, i32 addrspace(4)* %header_ptr
ret void
}
@ -62,9 +62,9 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
define void @use_kernarg_segment_ptr() #1 {
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
%header_ptr = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %header_ptr
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
%value = load volatile i32, i32 addrspace(4)* %header_ptr
ret void
}
@ -435,17 +435,17 @@ define void @use_every_sgpr_input() #1 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
%val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
%queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
%val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
%kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
%val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
%kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
%val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
%val3 = call i64 @llvm.amdgcn.dispatch.id()
call void asm sideeffect "; use $0", "s"(i64 %val3)
@ -515,17 +515,17 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
%val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
%queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
%val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
%kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
%val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
%kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
%val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
%val3 = call i64 @llvm.amdgcn.dispatch.id()
call void asm sideeffect "; use $0", "s"(i64 %val3)
@ -573,17 +573,17 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
store volatile i32 0, i32 addrspace(5)* %alloca
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
%val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
%queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
%val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
%kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
%val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
%kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
%val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
%val3 = call i64 @llvm.amdgcn.dispatch.id()
call void asm sideeffect "; use $0", "s"(i64 %val3)
@ -603,10 +603,10 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
declare i32 @llvm.amdgcn.workgroup.id.z() #0
declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
declare noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind noinline }

View File

@ -87,12 +87,12 @@ define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32
entry:
%out.gep = getelementptr i32, i32* %out, i64 999999
%in.gep = getelementptr i32, i32* %in, i64 7
%cast = addrspacecast i32* %in.gep to i32 addrspace(2)*
%cast = addrspacecast i32* %in.gep to i32 addrspace(4)*
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %cast
%tmp1 = load i32, i32 addrspace(4)* %cast
br label %endif
endif:

View File

@ -268,23 +268,23 @@ done:
}
; OPT-LABEL: @test_sink_constant_small_offset_i32
; OPT-NOT: getelementptr i32, i32 addrspace(2)*
; OPT-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
; GCN: s_and_saveexec_b64
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %in.gep
%tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@ -297,23 +297,23 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
; OPT-NOT: getelementptr i32, i32 addrspace(2)*
; OPT-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
; GCN: s_and_saveexec_b64
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 255
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %in.gep
%tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@ -326,9 +326,9 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
; OPT-SI: getelementptr i32, i32 addrspace(2)*
; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
; OPT-SI: getelementptr i32, i32 addrspace(4)*
; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
; OPT-VI-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
@ -337,16 +337,16 @@ done:
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 256
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %in.gep
%tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@ -359,8 +359,8 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
; OPT-SI: getelementptr i32, i32 addrspace(2)*
; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
; OPT-SI: getelementptr i32, i32 addrspace(4)*
; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
@ -369,16 +369,16 @@ done:
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 4294967295
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %in.gep
%tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@ -391,7 +391,7 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
; OPT: getelementptr i32, i32 addrspace(2)*
; OPT: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
@ -400,16 +400,16 @@ done:
; GCN: s_addc_u32
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 17179869181
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %in.gep
%tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@ -430,16 +430,16 @@ done:
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262143
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %in.gep
%tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@ -452,9 +452,9 @@ done:
}
; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
; OPT-SI: getelementptr i32, i32 addrspace(2)*
; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
; OPT-VI: getelementptr i32, i32 addrspace(2)*
; OPT-SI: getelementptr i32, i32 addrspace(4)*
; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
; OPT-VI: getelementptr i32, i32 addrspace(4)*
; OPT: br i1
; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
@ -468,16 +468,16 @@ done:
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262144
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = load i32, i32 addrspace(2)* %in.gep
%tmp1 = load i32, i32 addrspace(4)* %in.gep
br label %endif
endif:
@ -524,17 +524,17 @@ bb34:
; OPT: br i1 %tmp0,
; OPT: if:
; OPT: getelementptr i8, {{.*}} 4095
define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(4)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
%in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
%in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
%tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
%bitcast = bitcast i8 addrspace(4)* %in.gep to i32 addrspace(4)*
%tmp1 = load i32, i32 addrspace(4)* %bitcast, align 1
br label %endif
endif:

View File

@ -32,9 +32,9 @@ endif:
; GCN: v_add_f64
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e32
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(4)* %in) #0 {
entry:
%v = load double, double addrspace(2)* %in
%v = load double, double addrspace(4)* %in
%cc = fcmp oeq double %v, 1.000000e+00
br i1 %cc, label %if, label %endif

View File

@ -187,9 +187,9 @@ endif:
; GCN: [[ENDIF]]:
; GCN: buffer_store_dword
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(2)* %in, float %cnd) #0 {
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(4)* %in, float %cnd) #0 {
entry:
%v = load i32, i32 addrspace(2)* %in
%v = load i32, i32 addrspace(4)* %in
%cc = fcmp oeq float %cnd, 1.000000e+00
br i1 %cc, label %if, label %endif
@ -206,9 +206,9 @@ endif:
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_constant_load:
; GCN: v_cndmask_b32
define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(2)* %in) #0 {
define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(4)* %in) #0 {
entry:
%v = load float, float addrspace(2)* %in
%v = load float, float addrspace(4)* %in
%cc = fcmp oeq float %v, 1.000000e+00
br i1 %cc, label %if, label %endif
@ -248,9 +248,9 @@ endif:
; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]]
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(2)* %in, i32 %cond) #0 {
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 {
entry:
%v = load i32, i32 addrspace(2)* %in
%v = load i32, i32 addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif
@ -295,9 +295,9 @@ endif:
; GCN: s_addc_u32
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(2)* %in, i32 %cond) #0 {
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(4)* %in, i32 %cond) #0 {
entry:
%v = load i64, i64 addrspace(2)* %in
%v = load i64, i64 addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif
@ -320,9 +320,9 @@ endif:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(2)* %in, i32 %cond) #0 {
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(4)* %in, i32 %cond) #0 {
entry:
%v = load <3 x i32>, <3 x i32> addrspace(2)* %in
%v = load <3 x i32>, <3 x i32> addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif
@ -345,9 +345,9 @@ endif:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(2)* %in, i32 %cond) #0 {
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(4)* %in, i32 %cond) #0 {
entry:
%v = load <4 x i32>, <4 x i32> addrspace(2)* %in
%v = load <4 x i32>, <4 x i32> addrspace(4)* %in
%cc = icmp eq i32 %cond, 1
br i1 %cc, label %if, label %endif

View File

@ -8,8 +8,8 @@
; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN-DAG: buffer_store_short [[VELT0]]
; GCN-DAG: buffer_store_short [[VELT1]]
define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%p0 = extractelement <2 x half> %vec, i32 0
%p1 = extractelement <2 x half> %vec, i32 1
%out1 = getelementptr half, half addrspace(1)* %out, i32 10
@ -26,8 +26,8 @@ define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2
; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN: buffer_store_short [[VELT1]]
; GCN: ScratchSize: 0
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr, i32 %idx) #0 {
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 %idx) #0 {
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%elt = extractelement <2 x half> %vec, i32 %idx
store half %elt, half addrspace(1)* %out, align 2
ret void
@ -45,12 +45,12 @@ define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(
; SI: buffer_store_short [[ELT]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
; GCN: ScratchSize: 0{{$}}
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%idx = load i32, i32 addrspace(1)* %gep
%elt = extractelement <2 x half> %vec, i32 %idx
store half %elt, half addrspace(1)* %out.gep, align 2

View File

@ -9,8 +9,8 @@
; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN-DAG: buffer_store_short [[VELT0]]
; GCN-DAG: buffer_store_short [[VELT1]]
define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%p0 = extractelement <2 x i16> %vec, i32 0
%p1 = extractelement <2 x i16> %vec, i32 1
%out1 = getelementptr i16, i16 addrspace(1)* %out, i32 10
@ -27,8 +27,8 @@ define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x
; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
; GCN: buffer_store_short [[VELT1]]
; GCN: ScratchSize: 0
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %idx) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %idx) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt = extractelement <2 x i16> %vec, i32 %idx
store i16 %elt, i16 addrspace(1)* %out, align 2
ret void
@ -45,13 +45,13 @@ define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1
; SI: buffer_store_short [[ELT]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
; GCN: ScratchSize: 0{{$}}
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
%idx = load volatile i32, i32 addrspace(1)* %gep
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt = extractelement <2 x i16> %vec, i32 %idx
store i16 %elt, i16 addrspace(1)* %out.gep, align 2
ret void

View File

@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.workgroup.id.x()
declare void @llvm.amdgcn.s.barrier()
@ -34,19 +34,19 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) {
fence syncscope("workgroup") acquire
%8 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4
%9 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%10 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%10 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%11 = call i32 @llvm.amdgcn.workitem.id.x()
%12 = call i32 @llvm.amdgcn.workgroup.id.x()
%13 = getelementptr inbounds i8, i8 addrspace(2)* %10, i64 4
%14 = bitcast i8 addrspace(2)* %13 to i16 addrspace(2)*
%15 = load i16, i16 addrspace(2)* %14, align 4
%13 = getelementptr inbounds i8, i8 addrspace(4)* %10, i64 4
%14 = bitcast i8 addrspace(4)* %13 to i16 addrspace(4)*
%15 = load i16, i16 addrspace(4)* %14, align 4
%16 = zext i16 %15 to i32
%17 = mul i32 %12, %16
%18 = add i32 %17, %11
%19 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%19 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%20 = zext i32 %18 to i64
%21 = bitcast i8 addrspace(2)* %19 to i64 addrspace(2)*
%22 = load i64, i64 addrspace(2)* %21, align 8
%21 = bitcast i8 addrspace(4)* %19 to i64 addrspace(4)*
%22 = load i64, i64 addrspace(4)* %21, align 8
%23 = add i64 %22, %20
%24 = getelementptr inbounds i32, i32 addrspace(1)* %9, i64 %23
store i32 %8, i32 addrspace(1)* %24, align 4
@ -68,56 +68,56 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) {
; <label>:4: ; preds = %58, %1
%5 = load i32, i32 addrspace(5)* %3, align 4
%6 = sext i32 %5 to i64
%7 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%7 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%8 = call i32 @llvm.amdgcn.workitem.id.x()
%9 = call i32 @llvm.amdgcn.workgroup.id.x()
%10 = getelementptr inbounds i8, i8 addrspace(2)* %7, i64 4
%11 = bitcast i8 addrspace(2)* %10 to i16 addrspace(2)*
%12 = load i16, i16 addrspace(2)* %11, align 4
%10 = getelementptr inbounds i8, i8 addrspace(4)* %7, i64 4
%11 = bitcast i8 addrspace(4)* %10 to i16 addrspace(4)*
%12 = load i16, i16 addrspace(4)* %11, align 4
%13 = zext i16 %12 to i32
%14 = mul i32 %9, %13
%15 = add i32 %14, %8
%16 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%16 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%17 = zext i32 %15 to i64
%18 = bitcast i8 addrspace(2)* %16 to i64 addrspace(2)*
%19 = load i64, i64 addrspace(2)* %18, align 8
%18 = bitcast i8 addrspace(4)* %16 to i64 addrspace(4)*
%19 = load i64, i64 addrspace(4)* %18, align 8
%20 = add i64 %19, %17
%21 = icmp ult i64 %6, %20
br i1 %21, label %22, label %61
; <label>:22: ; preds = %4
%23 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%23 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%24 = call i32 @llvm.amdgcn.workitem.id.x()
%25 = call i32 @llvm.amdgcn.workgroup.id.x()
%26 = getelementptr inbounds i8, i8 addrspace(2)* %23, i64 4
%27 = bitcast i8 addrspace(2)* %26 to i16 addrspace(2)*
%28 = load i16, i16 addrspace(2)* %27, align 4
%26 = getelementptr inbounds i8, i8 addrspace(4)* %23, i64 4
%27 = bitcast i8 addrspace(4)* %26 to i16 addrspace(4)*
%28 = load i16, i16 addrspace(4)* %27, align 4
%29 = zext i16 %28 to i32
%30 = mul i32 %25, %29
%31 = add i32 %30, %24
%32 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%32 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%33 = zext i32 %31 to i64
%34 = bitcast i8 addrspace(2)* %32 to i64 addrspace(2)*
%35 = load i64, i64 addrspace(2)* %34, align 8
%34 = bitcast i8 addrspace(4)* %32 to i64 addrspace(4)*
%35 = load i64, i64 addrspace(4)* %34, align 8
%36 = add i64 %35, %33
%37 = add i64 %36, 2184
%38 = trunc i64 %37 to i32
%39 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%40 = load i32, i32 addrspace(5)* %3, align 4
%41 = sext i32 %40 to i64
%42 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%42 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%43 = call i32 @llvm.amdgcn.workitem.id.x()
%44 = call i32 @llvm.amdgcn.workgroup.id.x()
%45 = getelementptr inbounds i8, i8 addrspace(2)* %42, i64 4
%46 = bitcast i8 addrspace(2)* %45 to i16 addrspace(2)*
%47 = load i16, i16 addrspace(2)* %46, align 4
%45 = getelementptr inbounds i8, i8 addrspace(4)* %42, i64 4
%46 = bitcast i8 addrspace(4)* %45 to i16 addrspace(4)*
%47 = load i16, i16 addrspace(4)* %46, align 4
%48 = zext i16 %47 to i32
%49 = mul i32 %44, %48
%50 = add i32 %49, %43
%51 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%51 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%52 = zext i32 %50 to i64
%53 = bitcast i8 addrspace(2)* %51 to i64 addrspace(2)*
%54 = load i64, i64 addrspace(2)* %53, align 8
%53 = bitcast i8 addrspace(4)* %51 to i64 addrspace(4)*
%54 = load i64, i64 addrspace(4)* %53, align 8
%55 = add i64 %54, %52
%56 = add i64 %41, %55
%57 = getelementptr inbounds i32, i32 addrspace(1)* %39, i64 %56
@ -147,19 +147,19 @@ define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) {
%2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
%3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%4 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%5 = call i32 @llvm.amdgcn.workitem.id.x()
%6 = call i32 @llvm.amdgcn.workgroup.id.x()
%7 = getelementptr inbounds i8, i8 addrspace(2)* %4, i64 4
%8 = bitcast i8 addrspace(2)* %7 to i16 addrspace(2)*
%9 = load i16, i16 addrspace(2)* %8, align 4
%7 = getelementptr inbounds i8, i8 addrspace(4)* %4, i64 4
%8 = bitcast i8 addrspace(4)* %7 to i16 addrspace(4)*
%9 = load i16, i16 addrspace(4)* %8, align 4
%10 = zext i16 %9 to i32
%11 = mul i32 %6, %10
%12 = add i32 %11, %5
%13 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%13 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%14 = zext i32 %12 to i64
%15 = bitcast i8 addrspace(2)* %13 to i64 addrspace(2)*
%16 = load i64, i64 addrspace(2)* %15, align 8
%15 = bitcast i8 addrspace(4)* %13 to i64 addrspace(4)*
%16 = load i64, i64 addrspace(4)* %15, align 8
%17 = add i64 %16, %14
%18 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %17
store i32 1, i32 addrspace(1)* %18, align 4
@ -178,19 +178,19 @@ define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) {
fence syncscope("workgroup") acquire
%24 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4
%25 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%26 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%26 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%27 = call i32 @llvm.amdgcn.workitem.id.x()
%28 = call i32 @llvm.amdgcn.workgroup.id.x()
%29 = getelementptr inbounds i8, i8 addrspace(2)* %26, i64 4
%30 = bitcast i8 addrspace(2)* %29 to i16 addrspace(2)*
%31 = load i16, i16 addrspace(2)* %30, align 4
%29 = getelementptr inbounds i8, i8 addrspace(4)* %26, i64 4
%30 = bitcast i8 addrspace(4)* %29 to i16 addrspace(4)*
%31 = load i16, i16 addrspace(4)* %30, align 4
%32 = zext i16 %31 to i32
%33 = mul i32 %28, %32
%34 = add i32 %33, %27
%35 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%35 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%36 = zext i32 %34 to i64
%37 = bitcast i8 addrspace(2)* %35 to i64 addrspace(2)*
%38 = load i64, i64 addrspace(2)* %37, align 8
%37 = bitcast i8 addrspace(4)* %35 to i64 addrspace(4)*
%38 = load i64, i64 addrspace(4)* %37, align 8
%39 = add i64 %38, %36
%40 = getelementptr inbounds i32, i32 addrspace(1)* %25, i64 %39
store i32 %24, i32 addrspace(1)* %40, align 4

View File

@ -164,7 +164,7 @@ define <5 x i32> @v5i32_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <8 x i32> @v8i32_func_void() #0 {
%ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
%val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
ret <8 x i32> %val
}
@ -177,7 +177,7 @@ define <8 x i32> @v8i32_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <16 x i32> @v16i32_func_void() #0 {
%ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
%val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
ret <16 x i32> %val
}
@ -194,7 +194,7 @@ define <16 x i32> @v16i32_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <32 x i32> @v32i32_func_void() #0 {
%ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
ret <32 x i32> %val
}
@ -214,7 +214,7 @@ define <2 x i64> @v2i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <3 x i64> @v3i64_func_void() #0 {
%ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
%val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
ret <3 x i64> %val
}
@ -225,7 +225,7 @@ define <3 x i64> @v3i64_func_void() #0 {
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <4 x i64> @v4i64_func_void() #0 {
%ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
%val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
ret <4 x i64> %val
}
@ -237,7 +237,7 @@ define <4 x i64> @v4i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <5 x i64> @v5i64_func_void() #0 {
%ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
%val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
ret <5 x i64> %val
}
@ -250,7 +250,7 @@ define <5 x i64> @v5i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <8 x i64> @v8i64_func_void() #0 {
%ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
%val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
ret <8 x i64> %val
}
@ -267,7 +267,7 @@ define <8 x i64> @v8i64_func_void() #0 {
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define <16 x i64> @v16i64_func_void() #0 {
%ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
%val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
ret <16 x i64> %val
}
@ -309,7 +309,7 @@ define <4 x i16> @v4i16_func_void() #0 {
; GFX9: v_lshrrev_b32_e32 v1, 16, v0
; GCN: s_setpc_b64
define <5 x i16> @v5i16_func_void() #0 {
%ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
%val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
ret <5 x i16> %val
}
@ -319,7 +319,7 @@ define <5 x i16> @v5i16_func_void() #0 {
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define <8 x i16> @v8i16_func_void() #0 {
%ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
%val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
ret <8 x i16> %val
}
@ -330,7 +330,7 @@ define <8 x i16> @v8i16_func_void() #0 {
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define <16 x i16> @v16i16_func_void() #0 {
%ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
%val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
ret <16 x i16> %val
}
@ -342,7 +342,7 @@ define <16 x i16> @v16i16_func_void() #0 {
; GCN-DAG: v14
; GCN-DAG: v15
define <16 x i8> @v16i8_func_void() #0 {
%ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
%val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
ret <16 x i8> %val
}
@ -356,7 +356,7 @@ define <16 x i8> @v16i8_func_void() #0 {
; GFX89-DAG: v_lshrrev_b16_e32 v1, 8, v0
; GCN: s_setpc_b64
define <4 x i8> @v4i8_func_void() #0 {
%ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
%val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
ret <4 x i8> %val
}
@ -427,7 +427,7 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0)
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define <33 x i32> @v33i32_func_void() #0 {
%ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(2)* undef
%ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
%val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
ret <33 x i32> %val
}
@ -469,7 +469,7 @@ define <33 x i32> @v33i32_func_void() #0 {
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
%ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(2)* undef
%ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
%val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
ret { <32 x i32>, i32 }%val
}
@ -511,7 +511,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
%ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(2)* undef
%ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
%val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
ret { i32, <32 x i32> }%val
}

View File

@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s
@private1 = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
@private2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
@available_externally = available_externally addrspace(2) global [256 x i32] zeroinitializer
@private1 = private unnamed_addr addrspace(4) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
@private2 = private unnamed_addr addrspace(4) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
@available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer
; GCN-LABEL: {{^}}private_test:
; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
@ -27,11 +27,11 @@
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4
define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
%ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index
%val = load float, float addrspace(2)* %ptr
%ptr = getelementptr [4 x float], [4 x float] addrspace(4) * @private1, i32 0, i32 %index
%val = load float, float addrspace(4)* %ptr
store volatile float %val, float addrspace(1)* %out
%ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index
%val2 = load float, float addrspace(2)* %ptr2
%ptr2 = getelementptr [4 x float], [4 x float] addrspace(4) * @private2, i32 0, i32 %index
%val2 = load float, float addrspace(4)* %ptr2
store volatile float %val2, float addrspace(1)* %out
ret void
}
@ -41,8 +41,8 @@ define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+4
define amdgpu_kernel void @available_externally_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(2)* @available_externally, i32 0, i32 1
%val = load i32, i32 addrspace(2)* %ptr
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(4)* @available_externally, i32 0, i32 1
%val = load i32, i32 addrspace(4)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

View File

@ -4,9 +4,9 @@
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
@b = internal addrspace(4) constant [1 x i16] [ i16 7 ], align 2
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
@float_gv = internal unnamed_addr addrspace(4) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
; FUNC-LABEL: {{^}}float:
; GCN: s_load_dword
@ -17,13 +17,13 @@
; EG-NOT: MOV
define amdgpu_kernel void @float(float addrspace(1)* %out, i32 %index) {
entry:
%0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
%1 = load float, float addrspace(2)* %0
%0 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index
%1 = load float, float addrspace(4)* %0
store float %1, float addrspace(1)* %out
ret void
}
@i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
@i32_gv = internal unnamed_addr addrspace(4) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
; FUNC-LABEL: {{^}}i32:
@ -35,8 +35,8 @@ entry:
; EG-NOT: MOV
define amdgpu_kernel void @i32(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
%1 = load i32, i32 addrspace(2)* %0
%0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(4)* @i32_gv, i32 0, i32 %index
%1 = load i32, i32 addrspace(4)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@ -44,7 +44,7 @@ entry:
%struct.foo = type { float, [5 x i32] }
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
@struct_foo_gv = internal unnamed_addr addrspace(4) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
; FUNC-LABEL: {{^}}struct_foo_gv_load:
; GCN: s_load_dword
@ -54,13 +54,13 @@ entry:
; EG-NOT: MOVA_INT
; EG-NOT: MOV
define amdgpu_kernel void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
%load = load i32, i32 addrspace(2)* %gep, align 4
%gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(4)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
%load = load i32, i32 addrspace(4)* %gep, align 4
store i32 %load, i32 addrspace(1)* %out, align 4
ret void
}
@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
@array_v1_gv = internal addrspace(4) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
<1 x i32> <i32 2>,
<1 x i32> <i32 3>,
<1 x i32> <i32 4> ]
@ -73,8 +73,8 @@ define amdgpu_kernel void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index
; EG-NOT: MOVA_INT
; EG-NOT: MOV
define amdgpu_kernel void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
%gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(4)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32>, <1 x i32> addrspace(4)* %gep, align 4
store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
ret void
}
@ -90,8 +90,8 @@ entry:
br i1 %0, label %if, label %else
if:
%1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
%2 = load float, float addrspace(2)* %1
%1 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index
%2 = load float, float addrspace(4)* %1
store float %2, float addrspace(1)* %out
br label %endif

View File

@ -10,9 +10,9 @@
; HSA: .globl simple_align16
; HSA: .p2align 5
define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 {
define void @simple_align16(i32 addrspace(1)* addrspace(4)* %ptr.out) align 32 {
entry:
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
store i32 0, i32 addrspace(1)* %out
ret void
}

View File

@ -51,9 +51,9 @@
; HSA: .size simple, .Lfunc_end0-simple
; HSA: ; Function info:
; HSA-NOT: COMPUTE_PGM_RSRC2
define void @simple(i32 addrspace(1)* addrspace(2)* %ptr.out) {
define void @simple(i32 addrspace(1)* addrspace(4)* %ptr.out) {
entry:
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
store i32 0, i32 addrspace(1)* %out
ret void
}
@ -61,9 +61,9 @@ entry:
; Ignore explicit alignment that is too low.
; HSA: .globl simple_align2
; HSA: .p2align 2
define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 {
define void @simple_align2(i32 addrspace(1)* addrspace(4)* %ptr.out) align 2 {
entry:
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
store i32 0, i32 addrspace(1)* %out
ret void
}

View File

@ -581,7 +581,7 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c)
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
i32 addrspace(2)* %c,
i32 addrspace(4)* %c,
i32 addrspace(3)* %l)
!kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
!kernel_arg_base_type !51 !kernel_arg_type_qual !25 {

View File

@ -20,21 +20,21 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
%.0.vec.insert = insertelement <2 x i32> undef, i32 %arg2, i32 0
%.4.vec.insert = shufflevector <2 x i32> %.0.vec.insert, <2 x i32> %tmp6, <2 x i32> <i32 0, i32 3>
%tmp7 = bitcast <2 x i32> %.4.vec.insert to i64
%tmp8 = inttoptr i64 %tmp7 to [4294967295 x i8] addrspace(2)*
%tmp8 = inttoptr i64 %tmp7 to [4294967295 x i8] addrspace(4)*
%tmp9 = add <3 x i32> %arg3, %arg5
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 32
%tmp11 = bitcast i8 addrspace(2)* %tmp10 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0
%tmp12 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp11, align 16
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32
%tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
%tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16
%tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> <i32 0, i32 1>
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(2)*
%tmp16 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)*
%tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0
%tmp17 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16
%tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 64
%tmp20 = bitcast i8 addrspace(2)* %tmp19 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0
%tmp21 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp20, align 16
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64
%tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
%tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0
ret void
}

View File

@ -10,8 +10,8 @@
; GFX9-NOT: lshr
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, 0x3e7, [[VEC]]
define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 999, i32 0
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@ -28,8 +28,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out,
; GFX9-NOT: [[ELT0]]
; GFX9-NOT: [[VEC]]
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT0]], [[VEC]]
define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@ -48,8 +48,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
; GFX9-DAG: s_pack_ll_b32_b16 s{{[0-9]+}}, [[ELT0]], [[ELT1]]
; GFX9-DAG: ; use [[ELT1]]
define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt1 = extractelement <2 x i16> %vec, i32 1
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
@ -68,8 +68,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> ad
; GFX9-NOT: [[ELT0]]
; GFX9-NOT: [[VEC]]
; GFX9: s_pack_hh_b32_b16 s{{[0-9]+}}, [[ELT_ARG]], [[VEC]]
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt.hi = lshr i32 %elt.arg, 16
%elt = trunc i32 %elt.hi to i16
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
@ -88,8 +88,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)*
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[ELT_ARG]], 16
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT1]], [[VEC]]
; GFX9: ; use [[ELT1]]
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt.hi = lshr i32 %elt.arg, 16
%elt = trunc i32 %elt.hi to i16
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
@ -113,8 +113,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[ELT_HI]], [[VEC_HI]]
; GFX9: ; use [[ELT_HI]]
; GFX9: ; use [[VEC_HI]]
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%elt.hi = lshr i32 %elt.arg, 16
%elt = trunc i32 %elt.hi to i16
%vec.hi = extractelement <2 x i16> %vec, i32 1
@ -137,8 +137,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i
; CIVI: s_or_b32 [[INS:s[0-9]+]], [[ELT0]], 0x3e70000
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], 0x3e7
define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 999, i32 1
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@ -153,8 +153,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out,
; GCN-NOT: shlr
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], [[ELT1]]
define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 1
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void
@ -167,8 +167,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, 0x4500, [[ELT1]]
define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x half> %vec, half 5.000000e+00, i32 0
store <2 x half> %vecins, <2 x half> addrspace(1)* %out
ret void
@ -182,8 +182,8 @@ define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out
; CIVI: s_or_b32 [[INS:s[0-9]+]], [[ELT0]], 0x45000000
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], 0x4500
define amdgpu_kernel void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x half> %vec, half 5.000000e+00, i32 1
store <2 x half> %vecins, <2 x half> addrspace(1)* %out
ret void
@ -399,9 +399,9 @@ define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspac
; GCN-DAG: s_lshl_b32 [[MASK:s[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VVEC]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 addrspace(2)* %idx.ptr) #0 {
%idx = load volatile i32, i32 addrspace(2)* %idx.ptr
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 addrspace(4)* %idx.ptr) #0 {
%idx = load volatile i32, i32 addrspace(4)* %idx.ptr
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%vecins = insertelement <2 x i16> %vec, i16 999, i32 %idx
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
ret void

View File

@ -22,8 +22,8 @@ define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointe
; GCN: s_load_dwordx2 s{{\[}}[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]{{\]}}
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
; GCN: buffer_store_dword [[K]], off, s{{\[}}[[SPTR_LO]]:
define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(2)* dereferenceable(4096) nonnull %in) #0 {
%ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(2)* %in, !invariant.load !0
define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(4)* dereferenceable(4096) nonnull %in) #0 {
%ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(4)* %in, !invariant.load !0
%ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1
store i16 123, i16 addrspace(1)* %ptr, align 4
store i16 456, i16 addrspace(1)* %ptr.1

View File

@ -14,10 +14,10 @@
; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
define amdgpu_vs void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <4 x i32>] addrspace(2)* byval %arg3, [17 x <4 x i32>] addrspace(2)* inreg %arg4, [17 x <4 x i32>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
define amdgpu_vs void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <32 x i8>] addrspace(4)* byval %arg2, [2 x <4 x i32>] addrspace(4)* byval %arg3, [17 x <4 x i32>] addrspace(4)* inreg %arg4, [17 x <4 x i32>] addrspace(4)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
main_body:
%tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(2)* %arg3, i64 0, i32 1
%tmp10 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(4)* %arg3, i64 0, i32 1
%tmp10 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp11 = shl i32 %arg6, 2
%tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
%tmp13 = bitcast i32 %tmp12 to float

View File

@ -7,13 +7,13 @@
; GCN: enable_sgpr_dispatch_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
%value = load i32, i32 addrspace(2)* %header_ptr
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
%value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { readnone }

View File

@ -2,23 +2,23 @@
; ERROR: in function test_kernel{{.*}}: non-hsa intrinsic with hsa target
define amdgpu_kernel void @test_kernel(i32 addrspace(1)* %out) #1 {
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load i32, i32 addrspace(2)* %header_ptr
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
%header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
%value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
; ERROR: in function test_func{{.*}}: non-hsa intrinsic with hsa target
define void @test_func(i32 addrspace(1)* %out) #1 {
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load i32, i32 addrspace(2)* %header_ptr
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
%header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
%value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind }

View File

@ -10,9 +10,9 @@
define amdgpu_ps i32 @test_ps() #1 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
%value = load volatile i32, i32 addrspace(4)* %buffer_ptr
ret i32 %value
}
@ -23,13 +23,13 @@ define amdgpu_ps i32 @test_ps() #1 {
define amdgpu_cs i32 @test_cs() #1 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
%buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
%value = load volatile i32, i32 addrspace(4)* %buffer_ptr
ret i32 %value
}
declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind }

View File

@ -11,9 +11,9 @@
; HSA: s_load_dword s0, s[4:5], 0x0
define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
%load = load volatile i32, i32 addrspace(2)* %cast
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
%load = load volatile i32, i32 addrspace(4)* %cast
ret void
}
@ -26,9 +26,9 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
; HSA: s_load_dword s0, s[4:5], 0x1c
define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
%load = load volatile i32, i32 addrspace(2)* %cast
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
%load = load volatile i32, i32 addrspace(4)* %cast
ret void
}
@ -38,9 +38,9 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @func_implicitarg_ptr() #1 {
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
%load = load volatile i32, i32 addrspace(2)* %cast
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
%load = load volatile i32, i32 addrspace(4)* %cast
ret void
}
@ -86,12 +86,12 @@ define void @func_call_implicitarg_ptr_func() #1 {
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0{{$}}
; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0{{$}}
define void @func_kernarg_implicitarg_ptr() #1 {
%kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%cast.kernarg.segment.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
%cast.implicitarg = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
%load0 = load volatile i32, i32 addrspace(2)* %cast.kernarg.segment.ptr
%load1 = load volatile i32, i32 addrspace(2)* %cast.implicitarg
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
%cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
%load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
%load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
ret void
}
@ -106,8 +106,8 @@ define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8])
ret void
}
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #2
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #2
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
attributes #0 = { nounwind noinline }
attributes #1 = { nounwind noinline }

View File

@ -11,10 +11,10 @@
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
%kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
%gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
%value = load i32, i32 addrspace(2)* %gep
%kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
%gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
%value = load i32, i32 addrspace(4)* %gep
store i32 %value, i32 addrspace(1)* %out
ret void
}
@ -23,10 +23,10 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
%implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
%gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
%value = load i32, i32 addrspace(2)* %gep
%implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
%gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
%value = load i32, i32 addrspace(4)* %gep
store i32 %value, i32 addrspace(1)* %out
ret void
}
@ -42,9 +42,9 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
; MESA: buffer_store_dword [[V_VAL]]
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 {
%implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%arg.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %arg.ptr
%implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %arg.ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}
@ -53,16 +53,16 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
define amdgpu_kernel void @test_no_kernargs() #1 {
%kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
%gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
%value = load i32, i32 addrspace(2)* %gep
%kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
%gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
%value = load i32, i32 addrspace(4)* %gep
store volatile i32 %value, i32 addrspace(1)* undef
ret void
}
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -7,13 +7,13 @@
; GCN: enable_sgpr_queue_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
%header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
%value = load i32, i32 addrspace(2)* %header_ptr
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
%value = load i32, i32 addrspace(4)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
attributes #0 = { nounwind readnone }

View File

@ -3,7 +3,7 @@
declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind
declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind
declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(4)* nocapture, i64, i1) nounwind
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
@ -328,8 +328,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad
}
; Test shouldConvertConstantLoadToIntImm
@hello.align4 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 4
@hello.align1 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 1
@hello.align4 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 4
@hello.align1 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 1
; FUNC-LABEL: {{^}}test_memcpy_const_string_align4:
; SI: s_getpc_b64
@ -341,8 +341,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad
; SI-DAG: buffer_store_dwordx4
; SI-DAG: buffer_store_dwordx4
define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
%str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)*
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(2)* align 4 %str, i64 32, i1 false)
%str = bitcast [16 x i8] addrspace(4)* @hello.align4 to i8 addrspace(4)*
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(4)* align 4 %str, i64 32, i1 false)
ret void
}
@ -366,7 +366,7 @@ define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noal
; SI: buffer_store_byte
; SI: buffer_store_byte
define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
%str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)*
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i1 false)
%str = bitcast [16 x i8] addrspace(4)* @hello.align1 to i8 addrspace(4)*
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(4)* %str, i64 32, i1 false)
ret void
}

View File

@ -6,8 +6,8 @@
; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
; GCN-NOHSA: buffer_store_dwordx2
; GCN-HSA: flat_store_dwordx2
define amdgpu_kernel void @constant_load_f64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
%ld = load double, double addrspace(2)* %in
define amdgpu_kernel void @constant_load_f64(double addrspace(1)* %out, double addrspace(4)* %in) #0 {
%ld = load double, double addrspace(4)* %in
store double %ld, double addrspace(1)* %out
ret void
}

View File

@ -9,57 +9,57 @@
; EG: VTX_READ_8
; EG: AND_INT
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
%load = load i1, i1 addrspace(2)* %in
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
%load = load i1, i1 addrspace(4)* %in
store i1 %load, i1 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v2i1:
define amdgpu_kernel void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
store <2 x i1> %load, <2 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v3i1:
define amdgpu_kernel void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
store <3 x i1> %load, <3 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v4i1:
define amdgpu_kernel void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
store <4 x i1> %load, <4 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v8i1:
define amdgpu_kernel void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
store <8 x i1> %load, <8 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v16i1:
define amdgpu_kernel void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
store <16 x i1> %load, <16 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v32i1:
define amdgpu_kernel void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
store <32 x i1> %load, <32 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_load_v64i1:
define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
store <64 x i1> %load, <64 x i1> addrspace(1)* %out
ret void
}
@ -67,8 +67,8 @@ define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64
; FUNC-LABEL: {{^}}constant_zextload_i1_to_i32:
; GCN: buffer_load_ubyte
; GCN: buffer_store_dword
define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
%a = load i1, i1 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
%a = load i1, i1 addrspace(4)* %in
%ext = zext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@ -81,136 +81,136 @@ define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i
; EG: VTX_READ_8
; EG: BFE_INT
define amdgpu_kernel void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
%a = load i1, i1 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
%a = load i1, i1 addrspace(4)* %in
%ext = sext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i32:
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i32:
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i32:
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i32:
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i32:
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = zext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i32:
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = sext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i32:
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i32:
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i32:
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i32:
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i32:
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i32:
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i32:
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = zext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i32:
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = sext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i32:
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = zext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i32:
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = sext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@ -221,8 +221,8 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspac
; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
%a = load i1, i1 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
%a = load i1, i1 addrspace(4)* %in
%ext = zext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@ -233,136 +233,136 @@ define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
%a = load i1, i1 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
%a = load i1, i1 addrspace(4)* %in
%ext = sext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i64:
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i64:
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i64:
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i64:
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i64:
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = zext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i64:
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
%ext = sext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i64:
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i64:
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i64:
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i64:
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i64:
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i64:
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i64:
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = zext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i64:
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
%ext = sext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i64:
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = zext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i64:
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
%ext = sext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void

View File

@ -8,9 +8,9 @@
; GCN-HSA: flat_load_ushort
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
entry:
%ld = load i16, i16 addrspace(2)* %in
%ld = load i16, i16 addrspace(4)* %in
store i16 %ld, i16 addrspace(1)* %out
ret void
}
@ -19,9 +19,9 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) {
define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
entry:
%ld = load <2 x i16>, <2 x i16> addrspace(2)* %in
%ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
ret void
}
@ -31,9 +31,9 @@ entry:
; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
ret void
}
@ -42,9 +42,9 @@ entry:
; GCN: s_load_dwordx2
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) {
define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
entry:
%ld = load <4 x i16>, <4 x i16> addrspace(2)* %in
%ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
ret void
}
@ -53,9 +53,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) {
define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
entry:
%ld = load <8 x i16>, <8 x i16> addrspace(2)* %in
%ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
ret void
}
@ -65,9 +65,9 @@ entry:
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) {
define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
entry:
%ld = load <16 x i16>, <16 x i16> addrspace(2)* %in
%ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
ret void
}
@ -80,8 +80,8 @@ entry:
; GCN-HSA: flat_store_dword
; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
%a = load i16, i16 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
%a = load i16, i16 addrspace(4)* %in
%ext = zext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@ -97,8 +97,8 @@ define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out,
; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 16
define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
%a = load i16, i16 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
%a = load i16, i16 addrspace(4)* %in
%ext = sext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@ -109,8 +109,8 @@ define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out,
; GCN-HSA: flat_load_ushort
; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = zext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@ -123,8 +123,8 @@ define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(
; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 16
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = sext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@ -140,8 +140,8 @@ define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal
; EG: 16
; EG: 16
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = zext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@ -160,8 +160,8 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(
; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV\.[XYZW]}}, 0.0, literal
; EG-DAG: 16
; EG-DAG: 16
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = sext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@ -183,9 +183,9 @@ define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(
; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal
; EG-DAG: 65535
; EG-DAG: 65535
define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
%ext = zext <3 x i16> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@ -204,9 +204,9 @@ entry:
; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal
; EG-DAG: 16
; EG-DAG: 16
define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
%ext = sext <3 x i16> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@ -229,8 +229,8 @@ entry:
; EG-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{T[0-9]\.[XYZW]}}, literal
; EG-DAG: 65535
; EG-DAG: 65535
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = zext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@ -254,8 +254,8 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(
; EG-DAG: 16
; EG-DAG: 16
; EG-DAG: 16
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = sext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@ -288,8 +288,8 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(
; EG-DAG: 65535
; EG-DAG: 65535
; EG-DAG: 65535
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = zext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@ -322,8 +322,8 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(
; EG-DAG: 16
; EG-DAG: 16
; EG-DAG: 16
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = sext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@ -337,8 +337,8 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(
; v16i16 is naturally 32 byte aligned
; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 0, #1
; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 16, #1
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@ -352,8 +352,8 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspa
; v16i16 is naturally 32 byte aligned
; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 0, #1
; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 16, #1
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@ -369,8 +369,8 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspa
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = zext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@ -385,8 +385,8 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspa
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = sext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@ -404,8 +404,8 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspa
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(4)* %in
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@ -421,8 +421,8 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(4)* %in
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@ -438,8 +438,8 @@ define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspa
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
%a = load i16, i16 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
%a = load i16, i16 addrspace(4)* %in
%ext = zext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@ -464,8 +464,8 @@ define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out,
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: These could be expanded earlier using ASHR 15
; EG: 31
define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
%a = load i16, i16 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
%a = load i16, i16 addrspace(4)* %in
%ext = sext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@ -475,8 +475,8 @@ define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out,
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = zext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@ -488,8 +488,8 @@ define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: These could be expanded earlier using ASHR 15
; EG: 31
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
%ext = sext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@ -498,8 +498,8 @@ define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(
; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = zext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@ -508,8 +508,8 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(
; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
%ext = sext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@ -518,8 +518,8 @@ define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(
; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = zext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@ -528,8 +528,8 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(
; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
%ext = sext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@ -538,8 +538,8 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(
; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@ -548,8 +548,8 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(
; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@ -559,8 +559,8 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = zext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@ -570,8 +570,8 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspa
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
%ext = sext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@ -583,8 +583,8 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspa
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@ -596,8 +596,8 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspa
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@ -606,16 +606,16 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspa
; These trigger undefined register machine verifier errors
; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
; %ext = zext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }
; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
; %ext = sext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void

View File

@ -7,9 +7,9 @@
; GCN: s_load_dword s{{[0-9]+}}
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
entry:
%ld = load i32, i32 addrspace(2)* %in
%ld = load i32, i32 addrspace(4)* %in
store i32 %ld, i32 addrspace(1)* %out
ret void
}
@ -18,9 +18,9 @@ entry:
; GCN: s_load_dwordx2
; EG: VTX_READ_64
define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
entry:
%ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
ret void
}
@ -29,9 +29,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(4)* %in) #0 {
entry:
%ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
%ld = load <3 x i32>, <3 x i32> addrspace(4)* %in
store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
ret void
}
@ -40,9 +40,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
entry:
%ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
ret void
}
@ -52,9 +52,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
entry:
%ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
ret void
}
@ -66,9 +66,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
entry:
%ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
ret void
}
@ -81,8 +81,8 @@ entry:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
; EG: CF_END
; EG: VTX_READ_32
define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
%ld = load i32, i32 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
%ld = load i32, i32 addrspace(4)* %in
%ext = zext i32 %ld to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@ -98,8 +98,8 @@ define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out,
; EG: VTX_READ_32
; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
; EG: 31
define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
%ld = load i32, i32 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
%ld = load i32, i32 addrspace(4)* %in
%ext = sext i32 %ld to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@ -108,8 +108,8 @@ define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out,
; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
; GCN: s_load_dword
; GCN: store_dwordx2
define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
%ext = zext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@ -119,8 +119,8 @@ define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(
; GCN: s_load_dword s[[LO:[0-9]+]]
; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
; GCN: store_dwordx2
define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
%ext = sext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@ -129,8 +129,8 @@ define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(
; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: store_dwordx4
define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
%ext = zext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@ -143,8 +143,8 @@ define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(
; GCN-DAG: s_ashr_i32
; GCN: store_dwordx4
define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
%ext = sext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@ -155,8 +155,8 @@ define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(
; GCN: store_dwordx4
; GCN: store_dwordx4
define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
%ext = zext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@ -172,8 +172,8 @@ define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(
; GCN: store_dwordx4
; GCN: store_dwordx4
define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
%ext = sext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@ -191,8 +191,8 @@ define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-SA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
%ext = zext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@ -219,8 +219,8 @@ define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
%ext = sext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@ -240,8 +240,8 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(
; GCN: store_dwordx4
; GCN: store_dwordx4
; GCN: store_dwordx4
define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
%ext = sext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@ -267,8 +267,8 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspa
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
%ext = zext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@ -319,8 +319,8 @@ define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspa
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
%ext = sext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@ -370,8 +370,8 @@ define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspa
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
%ext = zext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void

View File

@ -7,8 +7,8 @@
; FUNC-LABEL: {{^}}constant_load_i64:
; GCN: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; EG: VTX_READ_64
define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(2)* %in) #0 {
%ld = load i64, i64 addrspace(2)* %in
define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(4)* %in) #0 {
%ld = load i64, i64 addrspace(4)* %in
store i64 %ld, i64 addrspace(1)* %out
ret void
}
@ -17,9 +17,9 @@ define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspa
; GCN: s_load_dwordx4
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(4)* %in) #0 {
entry:
%ld = load <2 x i64>, <2 x i64> addrspace(2)* %in
%ld = load <2 x i64>, <2 x i64> addrspace(4)* %in
store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
ret void
}
@ -29,9 +29,9 @@ entry:
; EG-DAG: VTX_READ_128
; EG-DAG: VTX_READ_128
define amdgpu_kernel void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(4)* %in) #0 {
entry:
%ld = load <3 x i64>, <3 x i64> addrspace(2)* %in
%ld = load <3 x i64>, <3 x i64> addrspace(4)* %in
store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
ret void
}
@ -41,9 +41,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(4)* %in) #0 {
entry:
%ld = load <4 x i64>, <4 x i64> addrspace(2)* %in
%ld = load <4 x i64>, <4 x i64> addrspace(4)* %in
store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
ret void
}
@ -55,9 +55,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(4)* %in) #0 {
entry:
%ld = load <8 x i64>, <8 x i64> addrspace(2)* %in
%ld = load <8 x i64>, <8 x i64> addrspace(4)* %in
store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
ret void
}
@ -74,9 +74,9 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
define amdgpu_kernel void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(4)* %in) #0 {
entry:
%ld = load <16 x i64>, <16 x i64> addrspace(2)* %in
%ld = load <16 x i64>, <16 x i64> addrspace(4)* %in
store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
ret void
}

View File

@ -10,9 +10,9 @@
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; TODO: NOT AND
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
entry:
%ld = load i8, i8 addrspace(2)* %in
%ld = load i8, i8 addrspace(4)* %in
store i8 %ld, i8 addrspace(1)* %out
ret void
}
@ -22,9 +22,9 @@ entry:
; GCN-HSA: flat_load_ushort v
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
entry:
%ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
%ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
ret void
}
@ -33,9 +33,9 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
ret void
}
@ -44,9 +44,9 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
entry:
%ld = load <4 x i8>, <4 x i8> addrspace(2)* %in
%ld = load <4 x i8>, <4 x i8> addrspace(4)* %in
store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
ret void
}
@ -55,9 +55,9 @@ entry:
; GCN: s_load_dwordx2
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
entry:
%ld = load <8 x i8>, <8 x i8> addrspace(2)* %in
%ld = load <8 x i8>, <8 x i8> addrspace(4)* %in
store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
ret void
}
@ -66,9 +66,9 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
entry:
%ld = load <16 x i8>, <16 x i8> addrspace(2)* %in
%ld = load <16 x i8>, <16 x i8> addrspace(4)* %in
store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
ret void
}
@ -78,8 +78,8 @@ entry:
; GCN-HSA: flat_load_ubyte
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%a = load i8, i8 addrspace(4)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@ -92,8 +92,8 @@ define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%ld = load i8, i8 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%ld = load i8, i8 addrspace(4)* %in
%ext = sext i8 %ld to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@ -102,8 +102,8 @@ define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@ -114,8 +114,8 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@ -129,8 +129,8 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1
; TODO: This should use DST, but for some there are redundant MOVs
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
; EG: 8
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@ -150,8 +150,8 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@ -170,9 +170,9 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
%ext = zext <3 x i8> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@ -193,9 +193,9 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
%ext = sext <3 x i8> %ld to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
ret void
@ -214,8 +214,8 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@ -236,8 +236,8 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@ -264,8 +264,8 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@ -294,8 +294,8 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@ -335,8 +335,8 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@ -378,8 +378,8 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspac
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@ -450,8 +450,8 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspac
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = zext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@ -526,8 +526,8 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspac
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = sext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@ -539,8 +539,8 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspac
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(4)* %in
%ext = zext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@ -552,8 +552,8 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspac
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(4)* %in
%ext = sext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@ -570,8 +570,8 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspac
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%a = load i8, i8 addrspace(4)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@ -589,8 +589,8 @@ define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%a = load i8, i8 addrspace(4)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@ -600,8 +600,8 @@ define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@ -613,8 +613,8 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@ -623,8 +623,8 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@ -633,8 +633,8 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1
; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@ -643,8 +643,8 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@ -653,8 +653,8 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1
; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@ -663,8 +663,8 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@ -673,8 +673,8 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1
; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@ -683,8 +683,8 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@ -693,8 +693,8 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspac
; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@ -704,8 +704,8 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspac
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = zext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@ -715,24 +715,24 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspac
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = sext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
@ -744,8 +744,8 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspac
; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%a = load i8, i8 addrspace(4)* %in
%ext = zext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
ret void
@ -759,16 +759,16 @@ define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%a = load i8, i8 addrspace(4)* %in
%ext = sext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = zext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
ret void
@ -778,8 +778,8 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
%ext = sext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
ret void
@ -788,8 +788,8 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = zext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
ret void
@ -800,8 +800,8 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
%ext = sext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
ret void
@ -810,8 +810,8 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = zext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
ret void
@ -824,8 +824,8 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
%ext = sext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
ret void
@ -834,8 +834,8 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = zext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
ret void
@ -853,8 +853,8 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
%ext = sext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
ret void
@ -863,8 +863,8 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = zext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
ret void
@ -889,8 +889,8 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspac
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
%ext = sext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
ret void
@ -900,8 +900,8 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspac
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
ret void
@ -943,24 +943,24 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspac
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = zext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
; %ext = sext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
; ret void

View File

@ -473,10 +473,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
define void @load_constant_hi_v2i16_reglo_vreg(i16 addrspace(2)* %in, i16 %reg) #0 {
define void @load_constant_hi_v2i16_reglo_vreg(i16 addrspace(4)* %in, i16 %reg) #0 {
entry:
%gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
%load = load i16, i16 addrspace(2)* %gep
%gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
%load = load i16, i16 addrspace(4)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
@ -492,10 +492,10 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
define void @load_constant_hi_v2f16_reglo_vreg(half addrspace(2)* %in, half %reg) #0 {
define void @load_constant_hi_v2f16_reglo_vreg(half addrspace(4)* %in, half %reg) #0 {
entry:
%gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
%load = load half, half addrspace(2)* %gep
%gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
%load = load half, half addrspace(4)* %gep
%build0 = insertelement <2 x half> undef, half %reg, i32 0
%build1 = insertelement <2 x half> %build0, half %load, i32 1
store <2 x half> %build1, <2 x half> addrspace(1)* undef
@ -625,11 +625,11 @@ entry:
; GFX9-NEXT: s_waitcnt
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_setpc_b64
define <2 x i16> @load_constant_v2i16_split(i16 addrspace(2)* %in) #0 {
define <2 x i16> @load_constant_v2i16_split(i16 addrspace(4)* %in) #0 {
entry:
%gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 1
%load0 = load volatile i16, i16 addrspace(2)* %in
%load1 = load volatile i16, i16 addrspace(2)* %gep
%gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 1
%load0 = load volatile i16, i16 addrspace(4)* %in
%load1 = load volatile i16, i16 addrspace(4)* %gep
%build0 = insertelement <2 x i16> undef, i16 %load0, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load1, i32 1
ret <2 x i16> %build1

View File

@ -559,11 +559,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(2)* %in, i32 %reg) #0 {
define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
%gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
%load = load i16, i16 addrspace(2)* %gep
%gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
%load = load i16, i16 addrspace(4)* %gep
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
@ -578,11 +578,11 @@ entry:
; GFX9-NEXT: s_setpc_b64
; VI: flat_load_ushort
define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(2)* %in, i32 %reg) #0 {
define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
%gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
%load = load half, half addrspace(2)* %gep
%gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
%load = load half, half addrspace(4)* %gep
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
store <2 x half> %build1, <2 x half> addrspace(1)* undef
ret void

View File

@ -5,17 +5,17 @@
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
; GCN-LABEL: {{^}}get_global_id_0:
; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
; GCN: v_mov_b32_e32 [[VWGSIZEX:v[0-9]+]], [[WGSIZEX]]
; GCN: v_mad_u32_u24 v{{[0-9]+}}, s8, [[VWGSIZEX]], v0
define amdgpu_kernel void @get_global_id_0(i32 addrspace(1)* %out) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%cast.dispatch.ptr = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%gep = getelementptr inbounds i32, i32 addrspace(2)* %cast.dispatch.ptr, i64 1
%workgroup.size.xy = load i32, i32 addrspace(2)* %gep, align 4, !invariant.load !0
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%cast.dispatch.ptr = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
%gep = getelementptr inbounds i32, i32 addrspace(4)* %cast.dispatch.ptr, i64 1
%workgroup.size.xy = load i32, i32 addrspace(4)* %gep, align 4, !invariant.load !0
%workgroup.size.x = and i32 %workgroup.size.xy, 65535
%workitem.id.x = call i32 @llvm.amdgcn.workitem.id.x(), !range !1

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
@ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8
@ptr_load = addrspace(3) global i32 addrspace(4)* undef, align 8
; Make sure when the load from %ptr2 is folded the chain isn't lost,
; resulting in losing the store to gptr
@ -16,11 +16,11 @@
; SI: buffer_store_dword
; SI: s_endpgm
define amdgpu_kernel void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
%ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @ptr_load, align 8
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2
store i32 99, i32 addrspace(1)* %gptr, align 4
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
ret void

View File

@ -5,40 +5,40 @@
; CHECK-LABEL: {{^}}test_none:
; CHECK: buffer_load_format_x v0, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_vs float @test_none(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
define amdgpu_vs float @test_none(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0)
ret float %tmp7
}
; CHECK-LABEL: {{^}}test_idxen:
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen{{$}}
define amdgpu_vs float @test_idxen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
define amdgpu_vs float @test_idxen(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0)
ret float %tmp7
}
; CHECK-LABEL: {{^}}test_offen:
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
define amdgpu_vs float @test_offen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
define amdgpu_vs float @test_offen(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0)
ret float %tmp7
}
; CHECK-LABEL: {{^}}test_both:
; CHECK: buffer_load_format_x v0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen{{$}}
define amdgpu_vs float @test_both(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
define amdgpu_vs float @test_both(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
main_body:
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0)
ret float %tmp7
}

View File

@ -55,10 +55,10 @@ entry:
; CHECK-LABEL: {{^}}soffset_max_imm:
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
%tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
%tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
@ -74,10 +74,10 @@ main_body:
; CHECK-LABEL: {{^}}soffset_no_fold:
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
%tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
%tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16

View File

@ -642,12 +642,12 @@ uniform.multi.exit.region:
br i1 %uniform.cond0, label %uniform.if, label %uniform.ret1
uniform.if:
%sgpr0 = load volatile i32, i32 addrspace(2)* undef
%sgpr0 = load volatile i32, i32 addrspace(4)* undef
%uniform.cond1 = icmp slt i32 %sgpr0, 1
br i1 %uniform.cond1, label %uniform.then, label %uniform.endif
uniform.then:
%sgpr1 = load volatile i32, i32 addrspace(2)* undef
%sgpr1 = load volatile i32, i32 addrspace(4)* undef
%uniform.cond2 = icmp sge i32 %sgpr1, 4
store volatile i32 33, i32 addrspace(1)* undef
br i1 %uniform.cond2, label %uniform.ret0, label %uniform.endif

View File

@ -6,21 +6,21 @@
; EG: R_AMDGPU_ABS32 extern_const_addrspace
; CHECK-DAG: Name: extern_const_addrspace
@extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
@extern_const_addrspace = external unnamed_addr addrspace(4) constant [5 x i32], align 4
; CHECK-DAG: Name: load_extern_const_init
define amdgpu_kernel void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
%val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
%val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @extern_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; CHECK-DAG: Name: undef_const_addrspace
@undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
@undef_const_addrspace = unnamed_addr addrspace(4) constant [5 x i32] undef, align 4
; CHECK-DAG: Name: undef_const_addrspace
define amdgpu_kernel void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
%val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
%val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @undef_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}

View File

@ -194,9 +194,9 @@ define amdgpu_kernel void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out,
; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
; SI: s_waitcnt lgkmcnt(0)
; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
define amdgpu_kernel void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
define amdgpu_kernel void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
%val = load i32, i32 addrspace(2)* %in
%val = load i32, i32 addrspace(4)* %in
%mask = and i32 %val, 65535
store i32 %mask, i32 addrspace(1)* %out
ret void

View File

@ -1,7 +1,7 @@
;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s
;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s
%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(2)*, i32 addrspace(3)*, i32*, i32 addrspace(4)*}
%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*}
; CHECK-LABEL: nullptr_priv:
; CHECK-NEXT: .long 0
@ -15,7 +15,7 @@
; CHECK-LABEL: nullptr_const:
; GCN-NEXT: .quad 0
; R600-NEXT: .long 0
@nullptr_const = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)
@nullptr_const = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)
; CHECK-LABEL: nullptr_local:
; CHECK-NEXT: .long -1
@ -23,7 +23,7 @@
; CHECK-LABEL: nullptr_region:
; CHECK-NEXT: .long -1
@nullptr_region = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)
@nullptr_region = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)
; CHECK-LABEL: nullptr6:
; R600-NEXT: .long 0
@ -113,7 +113,7 @@
@structWithPointers = addrspace(1) global %struct.S {
i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*),
i32 addrspace(1)* addrspacecast (i32* null to i32 addrspace(1)*),
i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*),
i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*),
i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*),
i32* null,
i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)}, align 4
i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)}, align 4

View File

@ -8,9 +8,9 @@
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
; GFX9: ; use [[PACKED]]
define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
%val0 = load volatile i32, i32 addrspace(2)* %in0
%val1 = load volatile i32, i32 addrspace(2)* %in1
define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) #0 {
%val0 = load volatile i32, i32 addrspace(4)* %in0
%val1 = load volatile i32, i32 addrspace(4)* %in1
%lo.i = trunc i32 %val0 to i16
%hi.i = trunc i32 %val1 to i16
%lo = bitcast i16 %lo.i to half
@ -27,8 +27,8 @@ define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1234, [[VAL1]]
; GFX9: ; use [[PACKED]]
define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
%val1 = load i32, i32 addrspace(2)* %in1
define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(4)* %in1) #0 {
%val1 = load i32, i32 addrspace(4)* %in1
%hi.i = trunc i32 %val1 to i16
%hi = bitcast i16 %hi.i to half
%vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
@ -43,8 +43,8 @@ define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1234
; GFX9: ; use [[PACKED]]
define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 {
%val0 = load i32, i32 addrspace(2)* %in0
define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(4)* %in0) #0 {
%val0 = load i32, i32 addrspace(4)* %in0
%lo.i = trunc i32 %val0 to i16
%lo = bitcast i16 %lo.i to half
%vec.0 = insertelement <2 x half> undef, half %lo, i32 0

View File

@ -8,9 +8,9 @@
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
; GFX9: ; use [[PACKED]]
define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
%val0 = load volatile i32, i32 addrspace(2)* %in0
%val1 = load volatile i32, i32 addrspace(2)* %in1
define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) #0 {
%val0 = load volatile i32, i32 addrspace(4)* %in0
%val1 = load volatile i32, i32 addrspace(4)* %in1
%lo = trunc i32 %val0 to i16
%hi = trunc i32 %val1 to i16
%vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
@ -25,8 +25,8 @@ define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(2)* %in0, i32 addrspace(2)
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1c8, [[VAL1]]
; GFX9: ; use [[PACKED]]
define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(2)* %in1) #0 {
%val1 = load i32, i32 addrspace(2)* %in1
define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(4)* %in1) #0 {
%val1 = load i32, i32 addrspace(4)* %in1
%hi = trunc i32 %val1 to i16
%vec.0 = insertelement <2 x i16> undef, i16 456, i32 0
%vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
@ -40,8 +40,8 @@ define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(2)* %in1) #0 {
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1c8
; GFX9: ; use [[PACKED]]
define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(2)* %in0) #0 {
%val0 = load i32, i32 addrspace(2)* %in0
define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(4)* %in0) #0 {
%val0 = load i32, i32 addrspace(4)* %in0
%lo = trunc i32 %val0 to i16
%vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
%vec.1 = insertelement <2 x i16> %vec.0, i16 456, i32 1

View File

@ -1,6 +1,6 @@
; RUN: llc -filetype=obj -march=r600 -mcpu=cypress -verify-machineinstrs < %s | llvm-readobj -relocations -symbols | FileCheck %s
@arr = internal unnamed_addr addrspace(2) constant [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 4
@arr = internal unnamed_addr addrspace(4) constant [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 4
; CHECK: Relocations [
; CHECK: Section (3) .rel.text {
@ -19,8 +19,8 @@
; CHECK: }
define amdgpu_kernel void @test_constant_array_fixup(i32 addrspace(1)* nocapture %out, i32 %idx) #0 {
entry:
%arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(2)* @arr, i32 0, i32 %idx
%val = load i32, i32 addrspace(2)* %arrayidx
%arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @arr, i32 0, i32 %idx
%val = load i32, i32 addrspace(4)* %arrayidx
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}

View File

@ -28,9 +28,9 @@ define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
; SI-DAG: s_memtime
; VI-DAG: s_memrealtime
; GCN-DAG: s_load_dword
define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(2)* inreg %in) #0 {
define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(4)* inreg %in) #0 {
%cycle0 = call i64 @llvm.readcyclecounter()
%in.v = load i64, i64 addrspace(2)* %in
%in.v = load i64, i64 addrspace(4)* %in
%r.64 = add i64 %cycle0, %in.v
%r.32 = trunc i64 %r.64 to i32
ret i32 %r.32

View File

@ -7,7 +7,7 @@
; GCN: s_waitcnt expcnt(0)
; GCN: v_add_f32_e32 v0, 1.0, v0
; GCN-NOT: s_endpgm
define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%x = fadd float %arg3, 1.000000e+00
@ -26,7 +26,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v3, -1.0
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
@ -43,7 +43,7 @@ bb:
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v6
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@ -68,7 +68,7 @@ bb:
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
; GCN: v_mov_b32_e32 v0, 1.0
; GCN-NOT: s_endpgm
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
ret float 1.000000e+00
}
@ -82,7 +82,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v1, v2
; GCN: v_mov_b32_e32 v2, v3
; GCN-NOT: s_endpgm
define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%f = bitcast <2 x i32> %arg8 to <2 x float>
%s = insertvalue { float, <2 x float> } undef, float %arg14, 0
@ -101,7 +101,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v3, v6
; GCN-DAG: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@ -130,7 +130,7 @@ bb:
; GCN: v_mov_b32_e32 v3, v8
; GCN: v_mov_b32_e32 v4, v12
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@ -159,7 +159,7 @@ bb:
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
@ -181,7 +181,7 @@ bb:
; GCN: s_add_i32 s0, s3, 2
; GCN: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
%a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
@ -197,7 +197,7 @@ bb:
; GCN-DAG: s_mov_b32 s2, 7
; GCN-DAG: s_mov_b32 s3, 8
; GCN-NOT: s_endpgm
define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
@ -212,7 +212,7 @@ bb:
; GCN-DAG: s_add_i32 s0, s3, 2
; GCN-DAG: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%v = fadd float %arg3, 1.000000e+00
@ -235,7 +235,7 @@ bb:
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
; GCN: s_waitcnt expcnt(0)
define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }

View File

@ -65,24 +65,24 @@ done: ; preds = %loop
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]
; GCN-NOHSA: buffer_store_dword [[V_OUT]]
; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]]
define amdgpu_kernel void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
define amdgpu_kernel void @smrd_valu(i32 addrspace(4)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
entry:
%tmp = icmp ne i32 %a, 0
br i1 %tmp, label %if, label %else
if: ; preds = %entry
%tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
%tmp1 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in
br label %endif
else: ; preds = %entry
%tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
%tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2
%tmp2 = getelementptr i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in
%tmp3 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %tmp2
br label %endif
endif: ; preds = %else, %if
%tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ]
%tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000
%tmp6 = load i32, i32 addrspace(2)* %tmp5
%tmp4 = phi i32 addrspace(4)* [ %tmp1, %if ], [ %tmp3, %else ]
%tmp5 = getelementptr i32, i32 addrspace(4)* %tmp4, i32 3000
%tmp6 = load i32, i32 addrspace(4)* %tmp5
store i32 %tmp6, i32 addrspace(1)* %out
ret void
}
@ -93,12 +93,12 @@ endif: ; preds = %else, %if
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
%tmp3 = load i32, i32 addrspace(2)* %tmp2
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4
%tmp3 = load i32, i32 addrspace(4)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@ -113,12 +113,12 @@ entry:
; GCN-NOHSA: buffer_store_dword
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}
define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(4)* %in, i32 %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
%tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000
%tmp4 = load i32, i32 addrspace(2)* %tmp3
%tmp2 = getelementptr i32, i32 addrspace(4)* %in, i32 %tmp
%tmp3 = getelementptr i32, i32 addrspace(4)* %tmp2, i32 5000
%tmp4 = load i32, i32 addrspace(4)* %tmp3
%tmp5 = add i32 %tmp4, %c
store i32 %tmp5, i32 addrspace(1)* %out
ret void
@ -133,12 +133,12 @@ entry:
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: buffer_store_dwordx2
; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(4)* %in, i64 %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
%tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000
%tmp4 = load i64, i64 addrspace(2)* %tmp3
%tmp2 = getelementptr i64, i64 addrspace(4)* %in, i32 %tmp
%tmp3 = getelementptr i64, i64 addrspace(4)* %tmp2, i32 5000
%tmp4 = load i64, i64 addrspace(4)* %tmp3
%tmp5 = or i64 %tmp4, %c
store i64 %tmp5, i64 addrspace(1)* %out
ret void
@ -155,12 +155,12 @@ entry:
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: buffer_store_dwordx4
; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in, <4 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
%tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234
%tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3
%tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %in, i32 %tmp
%tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %tmp2, i32 1234
%tmp4 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp3
%tmp5 = or <4 x i32> %tmp4, %c
store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out
ret void
@ -189,12 +189,12 @@ entry:
; GCN-NOHSA: buffer_store_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in, <8 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
%tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234
%tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3
%tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %in, i32 %tmp
%tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %tmp2, i32 1234
%tmp4 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp3
%tmp5 = or <8 x i32> %tmp4, %c
store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out
ret void
@ -230,12 +230,12 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN: s_endpgm
define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in, <16 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
%tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234
%tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3
%tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %in, i32 %tmp
%tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %tmp2, i32 1234
%tmp4 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp3
%tmp5 = or <16 x i32> %tmp4, %c
store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out
ret void
@ -247,12 +247,12 @@ entry:
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
; GCN-NOHSA: buffer_store_dword [[ADD]]
; GCN-HSA: flat_store_dword {{.*}}, [[ADD]]
define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in, i32 %a) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
%tmp3 = load i32, i32 addrspace(2)* %tmp2
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4
%tmp3 = load i32, i32 addrspace(4)* %tmp2
%tmp4 = add i32 %tmp3, %a
store i32 %tmp4, i32 addrspace(1)* %out
ret void
@ -261,12 +261,12 @@ entry:
; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255
%tmp3 = load i32, i32 addrspace(2)* %tmp2
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 255
%tmp3 = load i32, i32 addrspace(4)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@ -275,12 +275,12 @@ entry:
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256
%tmp3 = load i32, i32 addrspace(2)* %tmp2
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 256
%tmp3 = load i32, i32 addrspace(4)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@ -290,12 +290,12 @@ entry:
; GCN-NOHSA: buffer_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
%tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)*
%tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4
store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
ret void
}
@ -313,12 +313,12 @@ entry:
; GCN-NOHSA: buffer_store_dword
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
%tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)*
%tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4
%elt0 = extractelement <8 x i32> %tmp3, i32 0
%elt1 = extractelement <8 x i32> %tmp3, i32 1
@ -350,12 +350,12 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
%tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)*
%tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
ret void
}
@ -385,12 +385,12 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
%tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)*
%tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4
%elt0 = extractelement <16 x i32> %tmp3, i32 0
%elt1 = extractelement <16 x i32> %tmp3, i32 1

View File

@ -15,11 +15,11 @@
bb:
%0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3
%tmp5 = alloca %struct.wombat, align 16, addrspace(5)
%1 = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%2 = bitcast i8 addrspace(2)* %1 to i32 addrspace(2)*
%3 = getelementptr inbounds i32, i32 addrspace(2)* %2, i64 1
%4 = bitcast i32 addrspace(2)* %3 to <2 x i32> addrspace(2)*, !amdgpu.uniform !3, !amdgpu.noclobber !3
%5 = load <2 x i32>, <2 x i32> addrspace(2)* %4, align 4, !invariant.load !3
%1 = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%2 = bitcast i8 addrspace(4)* %1 to i32 addrspace(4)*
%3 = getelementptr inbounds i32, i32 addrspace(4)* %2, i64 1
%4 = bitcast i32 addrspace(4)* %3 to <2 x i32> addrspace(4)*, !amdgpu.uniform !3, !amdgpu.noclobber !3
%5 = load <2 x i32>, <2 x i32> addrspace(4)* %4, align 4, !invariant.load !3
%6 = extractelement <2 x i32> %5, i32 0
%7 = extractelement <2 x i32> %5, i32 1
%8 = lshr i32 %6, 16
@ -32,7 +32,7 @@
%15 = add i32 %13, %14
%16 = add i32 %15, %11
%17 = getelementptr inbounds [256 x [16 x i8]], [256 x [16 x i8]] addrspace(3)* @sched_dbg_value_crash.tmp6, i32 0, i32 %16
%tmp7 = load i64, i64 addrspace(2)* null, align 536870912
%tmp7 = load i64, i64 addrspace(4)* null, align 536870912
%tmp8 = tail call i32 @llvm.amdgcn.workitem.id.x() #3, !range !4
%tmp9 = zext i32 %tmp8 to i64
%tmp10 = add i64 %tmp7, %tmp9
@ -141,7 +141,7 @@
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.y() #1
declare i32 @llvm.amdgcn.workitem.id.z() #1
declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i32, i1) #0
@ -199,9 +199,9 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0
%9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0
%10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0

View File

@ -528,8 +528,8 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
%ld = load i32, i32 addrspace(2)* %ptr
define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
%ld = load i32, i32 addrspace(4)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 15
%sext = ashr i16 %shl, 15
@ -547,8 +547,8 @@ define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addr
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
%ld = load i32, i32 addrspace(2)* %ptr
define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
%ld = load i32, i32 addrspace(4)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 14
%sext = ashr i16 %shl, 14

View File

@ -4,10 +4,10 @@
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @phi1(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
@ -28,10 +28,10 @@ ENDIF: ; preds = %ELSE, %main_body
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
define amdgpu_ps void @phi2(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
@ -47,10 +47,10 @@ main_body:
%tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
%tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
%tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
%tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
%tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
%tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
%tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0
%tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0
%tmp37 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp36, !tbaa !0
%tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0
%tmp39 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp38, !tbaa !0
%i.i = extractelement <2 x i32> %arg5, i32 0
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
@ -173,10 +173,10 @@ ENDIF24: ; preds = %IF25, %ENDIF
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @loop(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
@ -226,15 +226,15 @@ ENDIF: ; preds = %LOOP
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
%tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
%tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
%tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
%tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
%tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0
%tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
%tmp24 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp23, !tbaa !0
%tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
%tmp26 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp25, !tbaa !0
%tmp27 = fcmp oeq float %tmp22, 0.000000e+00
%tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32>
br i1 %tmp27, label %if, label %else
@ -290,7 +290,7 @@ endif: ; preds = %if1, %if0, %entry
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@ -326,15 +326,15 @@ ENDIF69: ; preds = %LOOP68
; [[END]]:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <4 x i32>] addrspace(4)* byval %arg2, [32 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0
%tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0
%tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !3
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
%tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
%tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3
%tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
%tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !3
%tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(4)* %arg3, i32 0, i32 0
%tmp26 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp25, !tbaa !3
%tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg2, i32 0, i32 0
%tmp28 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp27, !tbaa !3
%i.i = extractelement <2 x i32> %arg7, i32 0
%j.i = extractelement <2 x i32> %arg7, i32 1
%i.f.i = bitcast i32 %i.i to float
@ -382,11 +382,11 @@ bb71: ; preds = %bb80, %bb38
; Check the resource descriptor is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(4)* byval %arg) #0 {
bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
@ -397,11 +397,11 @@ bb:
; Check the sampler is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(4)* byval %arg) #0 {
bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
%tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)

View File

@ -6,15 +6,15 @@
; GCN-LABEL: {{^}}main:
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
define amdgpu_ps void @main(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @main(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
%tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
%tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
%tmp25 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp24, !tbaa !0
%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0
%tmp23 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp22, !tbaa !0
%tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0
%tmp25 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp24, !tbaa !0
%i.i = extractelement <2 x i32> %arg5, i32 0
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float

View File

@ -16,12 +16,12 @@
; CHECK: s_waitcnt vmcnt(0)
; CHECK: exp
; CHECK: s_endpgm
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
main_body:
%tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
%tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
%tmp23 = bitcast [17 x <4 x i32>] addrspace(2)* %arg2 to <16 x i8> addrspace(2)*
%tmp24 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp23, align 16, !tbaa !0
%tmp = bitcast [34 x <8 x i32>] addrspace(4)* %arg3 to <32 x i8> addrspace(4)*
%tmp22 = load <32 x i8>, <32 x i8> addrspace(4)* %tmp, align 32, !tbaa !0
%tmp23 = bitcast [17 x <4 x i32>] addrspace(4)* %arg2 to <16 x i8> addrspace(4)*
%tmp24 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp23, align 16, !tbaa !0
%i.i = extractelement <2 x i32> %arg11, i32 0
%j.i = extractelement <2 x i32> %arg11, i32 1
%i.f.i = bitcast i32 %i.i to float

View File

@ -24,10 +24,10 @@
; GCN: s_endpgm
; TOVGPR: ScratchSize: 0{{$}}
define amdgpu_ps void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
define amdgpu_ps void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
main_body:
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
%tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 96)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 100)
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 104)
@ -66,39 +66,39 @@ main_body:
%tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 372)
%tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 376)
%tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 384)
%tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
%tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
%tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
%tmp63 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp62, !tbaa !0
%tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
%tmp61 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp60, !tbaa !0
%tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
%tmp63 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp62, !tbaa !0
%tmp63.bc = bitcast <4 x i32> %tmp63 to <4 x i32>
%tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
%tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
%tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
%tmp67 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp66, !tbaa !0
%tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
%tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
%tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
%tmp71 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp70, !tbaa !0
%tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
%tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
%tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
%tmp75 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp74, !tbaa !0
%tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
%tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
%tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
%tmp79 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp78, !tbaa !0
%tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
%tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
%tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
%tmp83 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp82, !tbaa !0
%tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
%tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
%tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
%tmp87 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp86, !tbaa !0
%tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
%tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
%tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
%tmp91 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp90, !tbaa !0
%tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 1
%tmp65 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp64, !tbaa !0
%tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 1
%tmp67 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp66, !tbaa !0
%tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 2
%tmp69 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp68, !tbaa !0
%tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 2
%tmp71 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp70, !tbaa !0
%tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 3
%tmp73 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp72, !tbaa !0
%tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 3
%tmp75 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp74, !tbaa !0
%tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 4
%tmp77 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp76, !tbaa !0
%tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 4
%tmp79 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp78, !tbaa !0
%tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 5
%tmp81 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp80, !tbaa !0
%tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 5
%tmp83 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp82, !tbaa !0
%tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 6
%tmp85 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp84, !tbaa !0
%tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 6
%tmp87 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp86, !tbaa !0
%tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 7
%tmp89 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp88, !tbaa !0
%tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 7
%tmp91 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp90, !tbaa !0
%i.i = extractelement <2 x i32> %arg6, i32 0
%j.i = extractelement <2 x i32> %arg6, i32 1
%i.f.i = bitcast i32 %i.i to float
@ -778,10 +778,10 @@ ENDIF66: ; preds = %LOOP65
; GCN-LABEL: {{^}}main1:
; GCN: s_endpgm
; TOVGPR: ScratchSize: 0{{$}}
define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
main_body:
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
%tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 0)
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 4)
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 8)
@ -885,42 +885,42 @@ main_body:
%tmp122 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 716)
%tmp123 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 864)
%tmp124 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 868)
%tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
%tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
%tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
%tmp128 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp127, !tbaa !0
%tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
%tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
%tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
%tmp132 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp131, !tbaa !0
%tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
%tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
%tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
%tmp136 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp135, !tbaa !0
%tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
%tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
%tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
%tmp140 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp139, !tbaa !0
%tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
%tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
%tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
%tmp144 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp143, !tbaa !0
%tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
%tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
%tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
%tmp148 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp147, !tbaa !0
%tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
%tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
%tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
%tmp152 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp151, !tbaa !0
%tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
%tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
%tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
%tmp156 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp155, !tbaa !0
%tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
%tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
%tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 8
%tmp160 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp159, !tbaa !0
%tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
%tmp126 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp125, !tbaa !0
%tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
%tmp128 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp127, !tbaa !0
%tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 1
%tmp130 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp129, !tbaa !0
%tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 1
%tmp132 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp131, !tbaa !0
%tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 2
%tmp134 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp133, !tbaa !0
%tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 2
%tmp136 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp135, !tbaa !0
%tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 3
%tmp138 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp137, !tbaa !0
%tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 3
%tmp140 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp139, !tbaa !0
%tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 4
%tmp142 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp141, !tbaa !0
%tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 4
%tmp144 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp143, !tbaa !0
%tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 5
%tmp146 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp145, !tbaa !0
%tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 5
%tmp148 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp147, !tbaa !0
%tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 6
%tmp150 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp149, !tbaa !0
%tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 6
%tmp152 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp151, !tbaa !0
%tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 7
%tmp154 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp153, !tbaa !0
%tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 7
%tmp156 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp155, !tbaa !0
%tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 8
%tmp158 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp157, !tbaa !0
%tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 8
%tmp160 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp159, !tbaa !0
%tmp161 = fcmp ugt float %arg17, 0.000000e+00
%tmp162 = select i1 %tmp161, float 1.000000e+00, float 0.000000e+00
%i.i = extractelement <2 x i32> %arg6, i32 0

View File

@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
@stored_constant_ptr = addrspace(3) global i32 addrspace(2)* undef, align 8
@stored_constant_ptr = addrspace(3) global i32 addrspace(4)* undef, align 8
@stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8
; GCN-LABEL: {{^}}reorder_local_load_global_store_local_load:
@ -100,14 +100,14 @@ define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load
; CI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
%ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3
%ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3
%tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
%tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@ -129,14 +129,14 @@ define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32
; CI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
%ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3
%ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3
%tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
%tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@ -151,13 +151,13 @@ define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 a
; GCN: ds_write_b32
; CI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(4)* %ptr0) #0 {
%ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2
%tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
%tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2

View File

@ -12,10 +12,10 @@
; GCN: buffer_store_dword
; GCN: [[EXIT]]:
; GCN: s_endpgm
define amdgpu_kernel void @vccz_workaround(i32 addrspace(2)* %in, i32 addrspace(1)* %out, float %cond) {
define amdgpu_kernel void @vccz_workaround(i32 addrspace(4)* %in, i32 addrspace(1)* %out, float %cond) {
entry:
%cnd = fcmp oeq float 0.0, %cond
%sgpr = load volatile i32, i32 addrspace(2)* %in
%sgpr = load volatile i32, i32 addrspace(4)* %in
br i1 %cnd, label %if, label %endif
if:

View File

@ -7,10 +7,10 @@
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
%tmp1 = load i32, i32 addrspace(2)* %tmp
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 1
%tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -19,10 +19,10 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
%tmp1 = load i32, i32 addrspace(2)* %tmp
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 255
%tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -34,10 +34,10 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
%tmp1 = load i32, i32 addrspace(2)* %tmp
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 256
%tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -49,10 +49,10 @@ entry:
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; GCN: s_endpgm
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
%tmp1 = load i32, i32 addrspace(2)* %tmp
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296
%tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -63,10 +63,10 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
%tmp1 = load i32, i32 addrspace(2)* %tmp
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143
%tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -77,10 +77,10 @@ entry:
; SIVIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
%tmp1 = load i32, i32 addrspace(2)* %tmp
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144
%tmp1 = load i32, i32 addrspace(4)* %tmp
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@ -106,10 +106,10 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const0:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@ -120,10 +120,10 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const1:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1020)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@ -137,10 +137,10 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1024)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@ -152,10 +152,10 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048572)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@ -167,10 +167,10 @@ main_body:
; SIVIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048576)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
@ -257,9 +257,9 @@ main_body:
; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
; GCN: v_readfirstlane
define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
main_body:
%descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
%descptr = bitcast [0 x i8] addrspace(4)* %0 to <4 x i32> addrspace(4)*, !amdgpu.uniform !0
br label %.outer_loop_header
ret_block: ; preds = %.outer, %.label22, %main_body
@ -275,7 +275,7 @@ ret_block: ; preds = %.outer, %.label22, %
br i1 %inner_br1, label %.inner_loop_body, label %ret_block
.inner_loop_body:
%descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
%descriptor = load <4 x i32>, <4 x i32> addrspace(4)* %descptr, align 16, !invariant.load !0
%load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
%inner_br2 = icmp uge i32 %1, 10
br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body

View File

@ -87,7 +87,7 @@ endif:
; GCN-NOT: v_readlane_b32 m0
; GCN-NOT: s_buffer_store_dword m0
; GCN-NOT: s_buffer_load_dword m0
define amdgpu_ps void @spill_kill_m0_lds(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %m0) #0 {
define amdgpu_ps void @spill_kill_m0_lds(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %m0) #0 {
main_body:
%tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0)
%cmp = fcmp ueq float 0.000000e+00, %tmp
@ -191,7 +191,7 @@ endif:
; TOSMEM: s_endpgm
define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
%sval = load volatile i64, i64 addrspace(2)* undef
%sval = load volatile i64, i64 addrspace(4)* undef
%cmp = icmp eq i32 %arg, 0
br i1 %cmp, label %ret, label %bb

View File

@ -6,7 +6,7 @@
; GCN-LABEL: {{^}}split_smrd_add_worklist:
; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(4)* byval %arg) #0 {
bb:
%tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
%tmp1 = bitcast float %tmp to i32
@ -19,8 +19,8 @@ bb3: ; preds = %bb
%tmp4 = bitcast float %tmp to i32
%tmp5 = add i32 %tmp4, 4
%tmp6 = sext i32 %tmp5 to i64
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i64 0, i64 %tmp6
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)

View File

@ -394,11 +394,11 @@ entry:
; SIVI: buffer_store_dwordx2
; GFX9: global_store_dwordx2
define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
entry:
%0 = load i32, i32 addrspace(2)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
%1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
%0 = load i32, i32 addrspace(4)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
%1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
store i32 %0, i32 addrspace(1)* %out, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4

View File

@ -689,11 +689,11 @@ entry:
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
entry:
%0 = load i32, i32 addrspace(2)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
%1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
%0 = load i32, i32 addrspace(4)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
%1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
store i32 %0, i32 addrspace(5)* %out, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 1
store i32 %1, i32 addrspace(5)* %arrayidx1, align 4

View File

@ -27,9 +27,9 @@ define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; VI: s_sub_i32
; VI: s_sub_i32
define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
%b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0, <2 x i16> addrspace(4)* %in1) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
%b = load <2 x i16>, <2 x i16> addrspace(4)* %in1
%add = sub <2 x i16> %a, %b
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void
@ -38,8 +38,8 @@ define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; GCN-LABEL: {{^}}s_test_sub_self_v2i16:
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]]
; GCN: buffer_store_dword [[ZERO]]
define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0) #1 {
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
%add = sub <2 x i16> %a, %a
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
ret void

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
@ -15,10 +15,10 @@ declare void @llvm.amdgcn.s.dcache.wb() #0
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define amdgpu_kernel void @target_none() #0 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
@ -31,10 +31,10 @@ define amdgpu_kernel void @target_none() #0 {
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define amdgpu_kernel void @target_tahiti() #1 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
@ -47,10 +47,10 @@ define amdgpu_kernel void @target_tahiti() #1 {
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
; CHECK: s_dcache_inv_vol
define amdgpu_kernel void @target_bonaire() #3 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
@ -64,10 +64,10 @@ define amdgpu_kernel void @target_bonaire() #3 {
; CHECK: flat_store_dword
; CHECK: s_dcache_wb{{$}}
define amdgpu_kernel void @target_fiji() #4 {
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext

View File

@ -418,8 +418,8 @@ define amdgpu_kernel void @local_store_i64_align_4_with_split_offset(i64 addrspa
; UNALIGNED: s_load_dword
; SI: buffer_store_dword
define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
%v = load i32, i32 addrspace(2)* %p, align 1
define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
%v = load i32, i32 addrspace(4)* %p, align 1
store i32 %v, i32 addrspace(1)* %r, align 4
ret void
}
@ -430,8 +430,8 @@ define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32
; UNALIGNED: s_load_dword
; UNALIGNED: buffer_store_dword
define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
%v = load i32, i32 addrspace(2)* %p, align 2
define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
%v = load i32, i32 addrspace(4)* %p, align 2
store i32 %v, i32 addrspace(1)* %r, align 4
ret void
}
@ -444,8 +444,8 @@ define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 ad
; UNALIGNED: s_load_dwordx2
; UNALIGNED: buffer_store_dwordx2
define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
%v = load i64, i64 addrspace(2)* %p, align 2
define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(4)* %p, i64 addrspace(1)* %r) #0 {
%v = load i64, i64 addrspace(4)* %p, align 2
store i64 %v, i64 addrspace(1)* %r, align 4
ret void
}
@ -453,8 +453,8 @@ define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 ad
; SI-LABEL: {{^}}constant_align4_load_i64:
; SI: s_load_dwordx2
; SI: buffer_store_dwordx2
define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
%v = load i64, i64 addrspace(2)* %p, align 4
define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(4)* %p, i64 addrspace(1)* %r) #0 {
%v = load i64, i64 addrspace(4)* %p, align 4
store i64 %v, i64 addrspace(1)* %r, align 4
ret void
}
@ -462,8 +462,8 @@ define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 ad
; SI-LABEL: {{^}}constant_align4_load_v4i32:
; SI: s_load_dwordx4
; SI: buffer_store_dwordx4
define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
%v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4
define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(4)* %p, <4 x i32> addrspace(1)* %r) #0 {
%v = load <4 x i32>, <4 x i32> addrspace(4)* %p, align 4
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
ret void
}
@ -482,8 +482,8 @@ define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p
; UNALIGNED: buffer_load_dwordx2
; SI: buffer_store_dwordx2
define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 {
%v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1
define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(4)* %p, <2 x i32> addrspace(1)* %r) #0 {
%v = load <2 x i32>, <2 x i32> addrspace(4)* %p, align 1
store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4
ret void
}
@ -512,8 +512,8 @@ define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)*
; UNALIGNED: buffer_load_dwordx4
; SI: buffer_store_dwordx4
define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
%v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1
define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(4)* %p, <4 x i32> addrspace(1)* %r) #0 {
%v = load <4 x i32>, <4 x i32> addrspace(4)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
ret void
}
@ -521,8 +521,8 @@ define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)*
; SI-LABEL: {{^}}constant_align4_load_i8:
; SI: s_load_dword
; SI: buffer_store_byte
define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
%v = load i8, i8 addrspace(2)* %p, align 4
define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(4)* %p, i8 addrspace(1)* %r) #0 {
%v = load i8, i8 addrspace(4)* %p, align 4
store i8 %v, i8 addrspace(1)* %r, align 4
ret void
}
@ -530,8 +530,8 @@ define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrs
; SI-LABEL: {{^}}constant_align2_load_i8:
; SI: buffer_load_ubyte
; SI: buffer_store_byte
define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
%v = load i8, i8 addrspace(2)* %p, align 2
define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(4)* %p, i8 addrspace(1)* %r) #0 {
%v = load i8, i8 addrspace(4)* %p, align 2
store i8 %v, i8 addrspace(1)* %r, align 2
ret void
}
@ -541,10 +541,10 @@ define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrs
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
%gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1
%v0 = load i32, i32 addrspace(2)* %p, align 4
%v1 = load i32, i32 addrspace(2)* %gep0, align 4
define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
%gep0 = getelementptr i32, i32 addrspace(4)* %p, i64 1
%v0 = load i32, i32 addrspace(4)* %p, align 4
%v1 = load i32, i32 addrspace(4)* %gep0, align 4
%gep1 = getelementptr i32, i32 addrspace(1)* %r, i64 1
store i32 %v0, i32 addrspace(1)* %r, align 4

View File

@ -35,7 +35,7 @@ bb2: ; preds = %bb
br label %bb3
bb3: ; preds = %bb3, %bb2
%val = load volatile i32, i32 addrspace(2)* undef
%val = load volatile i32, i32 addrspace(4)* undef
%tmp4 = icmp eq i32 %val, %arg1
br i1 %tmp4, label %bb5, label %bb3

View File

@ -36,11 +36,11 @@ define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace
; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #2 ; encoding: [0x40,0x02,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #2 ; encoding: [0x40,0x02,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
@t = internal addrspace(2) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
@t = internal addrspace(4) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
define amdgpu_kernel void @vtx_fetch32_id2(i32 addrspace(1)* %out, i32 %in) {
%a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(2)* @t, i32 0, i32 %in
%v = load i32, i32 addrspace(2)* %a
%a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @t, i32 0, i32 %in
%v = load i32, i32 addrspace(4)* %a
store i32 %v, i32 addrspace(1)* %out
ret void
}

View File

@ -27,15 +27,15 @@
; GCN: NumVgprs: 256
; GCN: ScratchSize: 1536
define amdgpu_vs void @main([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <4 x i32>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <4 x i32>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
bb:
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i64 0, i64 0
%tmp11 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, align 16, !tbaa !0
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i64 0, i64 0
%tmp11 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, align 16, !tbaa !0
%tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 0)
%tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 16)
%tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 32)
%tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg4, i64 0, i64 0
%tmp16 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp15, align 16, !tbaa !0
%tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg4, i64 0, i64 0
%tmp16 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp15, align 16, !tbaa !0
%tmp17 = add i32 %arg5, %arg7
%tmp16.cast = bitcast <4 x i32> %tmp16 to <4 x i32>
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i1 false, i1 false)

View File

@ -11,19 +11,19 @@
; DEFAULT: exp
; DEFAULT: s_waitcnt lgkmcnt(0)
; DEFAULT: s_endpgm
define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
define amdgpu_vs void @main(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, <16 x i8> addrspace(4)* inreg %arg3, <16 x i8> addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(4)* inreg %constptr) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 0
%tmp10 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, !tbaa !0
%tmp10.cast = bitcast <16 x i8> %tmp10 to <4 x i32>
%tmp11 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp10.cast, i32 %arg6, i32 0, i1 false, i1 false)
%tmp12 = extractelement <4 x float> %tmp11, i32 0
%tmp13 = extractelement <4 x float> %tmp11, i32 1
call void @llvm.amdgcn.s.barrier() #1
%tmp14 = extractelement <4 x float> %tmp11, i32 2
%tmp15 = load float, float addrspace(2)* %constptr, align 4
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
%tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
%tmp15 = load float, float addrspace(4)* %constptr, align 4
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 1
%tmp17 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp16, !tbaa !0
%tmp17.cast = bitcast <16 x i8> %tmp17 to <4 x i32>
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp17.cast, i32 %arg6, i32 0, i1 false, i1 false)
%tmp19 = extractelement <4 x float> %tmp18, i32 0
@ -46,10 +46,10 @@ main_body:
; ILPMAX: exp pos0
; ILPMAX-NEXT: exp param0
; ILPMAX: s_endpgm
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <16 x i8>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
main_body:
%tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
%tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
%tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 0
%tmp11 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, align 16, !tbaa !0
%tmp12 = add i32 %arg5, %arg7
%tmp11.cast = bitcast <16 x i8> %tmp11 to <4 x i32>
%tmp13 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp11.cast, i32 %tmp12, i32 0, i1 false, i1 false)
@ -57,8 +57,8 @@ main_body:
%tmp15 = extractelement <4 x float> %tmp13, i32 1
%tmp16 = extractelement <4 x float> %tmp13, i32 2
%tmp17 = extractelement <4 x float> %tmp13, i32 3
%tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 1
%tmp19 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp18, align 16, !tbaa !0
%tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 1
%tmp19 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp18, align 16, !tbaa !0
%tmp20 = add i32 %arg5, %arg7
%tmp19.cast = bitcast <16 x i8> %tmp19 to <4 x i32>
%tmp21 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp19.cast, i32 %tmp20, i32 0, i1 false, i1 false)

View File

@ -22,19 +22,19 @@ bb:
br label %bb18
bb1: ; preds = %bb18
%tmp = tail call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%tmp = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp3 = tail call i32 @llvm.amdgcn.workgroup.id.x()
%tmp4 = getelementptr inbounds i8, i8 addrspace(2)* %tmp, i64 4
%tmp5 = bitcast i8 addrspace(2)* %tmp4 to i16 addrspace(2)*
%tmp6 = load i16, i16 addrspace(2)* %tmp5, align 4
%tmp4 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 4
%tmp5 = bitcast i8 addrspace(4)* %tmp4 to i16 addrspace(4)*
%tmp6 = load i16, i16 addrspace(4)* %tmp5, align 4
%tmp7 = zext i16 %tmp6 to i32
%tmp8 = mul i32 %tmp3, %tmp7
%tmp9 = add i32 %tmp8, %tmp2
%tmp10 = tail call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
%tmp10 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%tmp11 = zext i32 %tmp9 to i64
%tmp12 = bitcast i8 addrspace(2)* %tmp10 to i64 addrspace(2)*
%tmp13 = load i64, i64 addrspace(2)* %tmp12, align 8
%tmp12 = bitcast i8 addrspace(4)* %tmp10 to i64 addrspace(4)*
%tmp13 = load i64, i64 addrspace(4)* %tmp12, align 8
%tmp14 = add i64 %tmp13, %tmp11
%tmp15 = zext i1 %tmp99 to i32
%tmp16 = and i64 %tmp14, 4294967295
@ -131,7 +131,7 @@ bb18: ; preds = %bb18, %bb
}
; Function Attrs: nounwind readnone speculatable
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #1
@ -140,7 +140,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
declare i32 @llvm.amdgcn.workgroup.id.x() #1
; Function Attrs: nounwind readnone speculatable
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #1
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" }
attributes #1 = { nounwind readnone speculatable }

View File

@ -1,12 +1,12 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck -check-prefix=OPT %s
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
; OPT-LABEL: @constant_load_i1
; OPT: load i1
; OPT-NEXT: store i1
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
%val = load i1, i1 addrspace(2)* %in
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
%val = load i1, i1 addrspace(4)* %in
store i1 %val, i1 addrspace(1)* %out
ret void
}
@ -14,8 +14,8 @@ define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(
; OPT-LABEL: @constant_load_i1_align2
; OPT: load i1
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
%val = load i1, i1 addrspace(2)* %in, align 2
define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
%val = load i1, i1 addrspace(4)* %in, align 2
store i1 %val, i1 addrspace(1)* %out, align 2
ret void
}
@ -25,8 +25,8 @@ define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 add
; OPT-NEXT: load i32
; OPT-NEXT: trunc
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
%val = load i1, i1 addrspace(2)* %in, align 4
define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
%val = load i1, i1 addrspace(4)* %in, align 4
store i1 %val, i1 addrspace(1)* %out, align 4
ret void
}
@ -34,8 +34,8 @@ define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 add
; OPT-LABEL: @constant_load_i8
; OPT: load i8
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%val = load i8, i8 addrspace(2)* %in
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%val = load i8, i8 addrspace(4)* %in
store i8 %val, i8 addrspace(1)* %out
ret void
}
@ -43,8 +43,8 @@ define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(
; OPT-LABEL: @constant_load_i8_align2
; OPT: load i8
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%val = load i8, i8 addrspace(2)* %in, align 2
define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%val = load i8, i8 addrspace(4)* %in, align 2
store i8 %val, i8 addrspace(1)* %out, align 2
ret void
}
@ -54,8 +54,8 @@ define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 add
; OPT-NEXT: load i32
; OPT-NEXT: trunc
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%val = load i8, i8 addrspace(2)* %in, align 4
define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
%val = load i8, i8 addrspace(4)* %in, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
ret void
}
@ -64,8 +64,8 @@ define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addr
; OPT-LABEL: @constant_load_v2i8
; OPT: load <2 x i8>
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
ret void
}
@ -76,32 +76,32 @@ define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x
; OPT-NEXT: trunc
; OPT-NEXT: bitcast
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%ld = load <2 x i8>, <2 x i8> addrspace(2)* %in, align 4
define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%ld = load <2 x i8>, <2 x i8> addrspace(4)* %in, align 4
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out, align 4
ret void
}
; OPT-LABEL: @constant_load_v3i8
; OPT: bitcast <3 x i8>
; OPT-NEXT: load i32, i32 addrspace(2)
; OPT-NEXT: load i32, i32 addrspace(4)
; OPT-NEXT: trunc i32
; OPT-NEXT: bitcast i24
; OPT-NEXT: store <3 x i8>
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
ret void
}
; OPT-LABEL: @constant_load_v3i8_align4
; OPT: bitcast <3 x i8>
; OPT-NEXT: load i32, i32 addrspace(2)
; OPT-NEXT: load i32, i32 addrspace(4)
; OPT-NEXT: trunc i32
; OPT-NEXT: bitcast i24
; OPT-NEXT: store <3 x i8>
define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in, align 4
define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in, align 4
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out, align 4
ret void
}
@ -110,8 +110,8 @@ define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out
; OPT: load i16
; OPT: sext
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
%ld = load i16, i16 addrspace(2)* %in
define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
%ld = load i16, i16 addrspace(4)* %in
%ext = sext i16 %ld to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@ -123,8 +123,8 @@ define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspa
; OPT-NEXT: trunc
; OPT-NEXT: sext
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
%ld = load i16, i16 addrspace(2)* %in, align 4
define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
%ld = load i16, i16 addrspace(4)* %in, align 4
%ext = sext i16 %ld to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@ -133,8 +133,8 @@ define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16
; OPT-LABEL: @constant_load_f16
; OPT: load half
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(2)* %in) #0 {
%ld = load half, half addrspace(2)* %in
define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(4)* %in) #0 {
%ld = load half, half addrspace(4)* %in
store half %ld, half addrspace(1)* %out
ret void
}
@ -142,8 +142,8 @@ define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrs
; OPT-LABEL: @constant_load_v2f16
; OPT: load <2 x half>
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %in) #0 {
%ld = load <2 x half>, <2 x half> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %in) #0 {
%ld = load <2 x half>, <2 x half> addrspace(4)* %in
store <2 x half> %ld, <2 x half> addrspace(1)* %out
ret void
}
@ -151,8 +151,8 @@ define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2
; OPT-LABEL: @load_volatile
; OPT: load volatile i16
; OPT-NEXT: store
define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
%a = load volatile i16, i16 addrspace(2)* %in
define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
%a = load volatile i16, i16 addrspace(4)* %in
store i16 %a, i16 addrspace(1)* %out
ret void
}
@ -160,8 +160,8 @@ define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(2
; OPT-LABEL: @constant_load_v2i8_volatile
; OPT: load volatile <2 x i8>
; OPT-NEXT: store
define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%ld = load volatile <2 x i8>, <2 x i8> addrspace(2)* %in
define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
%ld = load volatile <2 x i8>, <2 x i8> addrspace(4)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
ret void
}
@ -182,8 +182,8 @@ define amdgpu_kernel void @constant_load_v2i8_addrspace1(<2 x i8> addrspace(1)*
; OPT-NEXT: zext
; OPT-NEXT: store
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%val = load i8, i8 addrspace(2)* %dispatch.ptr, align 4
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%val = load i8, i8 addrspace(4)* %dispatch.ptr, align 4
%ld = zext i8 %val to i32
store i32 %ld, i32 addrspace(1)* %ptr
ret void

View File

@ -2,64 +2,64 @@
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
@array = internal addrspace(4) constant [4096 x [32 x float]] zeroinitializer, align 4
; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 1
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 32
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 33
define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp = sext i32 %y to i64
%tmp1 = sext i32 %x to i64
%tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
%tmp4 = load float, float addrspace(2)* %tmp2, align 4
%tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp
%tmp4 = load float, float addrspace(4)* %tmp2, align 4
%tmp5 = fadd float %tmp4, 0.000000e+00
%tmp6 = add i32 %y, 1
%tmp7 = sext i32 %tmp6 to i64
%tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
%tmp10 = load float, float addrspace(2)* %tmp8, align 4
%tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp7
%tmp10 = load float, float addrspace(4)* %tmp8, align 4
%tmp11 = fadd float %tmp5, %tmp10
%tmp12 = add i32 %x, 1
%tmp13 = sext i32 %tmp12 to i64
%tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
%tmp16 = load float, float addrspace(2)* %tmp14, align 4
%tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp
%tmp16 = load float, float addrspace(4)* %tmp14, align 4
%tmp17 = fadd float %tmp11, %tmp16
%tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
%tmp20 = load float, float addrspace(2)* %tmp18, align 4
%tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp7
%tmp20 = load float, float addrspace(4)* %tmp18, align 4
%tmp21 = fadd float %tmp17, %tmp20
store float %tmp21, float addrspace(1)* %output, align 4
ret void
}
@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
@array2 = internal addrspace(4) constant [4096 x [4 x float]] zeroinitializer, align 4
; Some of the indices go over the maximum mubuf offset, so don't split them.
; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 255
; IR: add i32 %x, 256
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp = sext i32 %y to i64
%tmp1 = sext i32 %x to i64
%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
%tmp4 = load float, float addrspace(2)* %tmp2, align 4
%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp
%tmp4 = load float, float addrspace(4)* %tmp2, align 4
%tmp5 = fadd float %tmp4, 0.000000e+00
%tmp6 = add i32 %y, 255
%tmp7 = sext i32 %tmp6 to i64
%tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
%tmp10 = load float, float addrspace(2)* %tmp8, align 4
%tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp7
%tmp10 = load float, float addrspace(4)* %tmp8, align 4
%tmp11 = fadd float %tmp5, %tmp10
%tmp12 = add i32 %x, 256
%tmp13 = sext i32 %tmp12 to i64
%tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
%tmp16 = load float, float addrspace(2)* %tmp14, align 4
%tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp
%tmp16 = load float, float addrspace(4)* %tmp14, align 4
%tmp17 = fadd float %tmp11, %tmp16
%tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
%tmp20 = load float, float addrspace(2)* %tmp18, align 4
%tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp7
%tmp20 = load float, float addrspace(4)* %tmp18, align 4
%tmp21 = fadd float %tmp17, %tmp20
store float %tmp21, float addrspace(1)* %output, align 4
ret void
@ -97,18 +97,18 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y
; IR: getelementptr {{.*}} !amdgpu.uniform
; IR: getelementptr {{.*}} !amdgpu.uniform
; IR: getelementptr {{.*}} !amdgpu.uniform
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
main_body:
%22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
%23 = bitcast float %22 to i32
%24 = shl i32 %23, 1
%25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %1, i32 0, i32 %24, !amdgpu.uniform !0
%26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0
%25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(4)* %1, i32 0, i32 %24, !amdgpu.uniform !0
%26 = load <8 x i32>, <8 x i32> addrspace(4)* %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
%29 = bitcast [0 x <8 x i32>] addrspace(2)* %1 to [0 x <4 x i32>] addrspace(2)*
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i32 0, i32 %28, !amdgpu.uniform !0
%31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0
%29 = bitcast [0 x <8 x i32>] addrspace(4)* %1 to [0 x <4 x i32>] addrspace(4)*
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(4)* %29, i32 0, i32 %28, !amdgpu.uniform !0
%31 = load <4 x i32>, <4 x i32> addrspace(4)* %30, align 16, !invariant.load !0
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1