mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AMDGPU] Change constant addr space to 4
Differential Revision: https://reviews.llvm.org/D43170 llvm-svn: 325030
This commit is contained in:
parent
3f5aaeaf08
commit
c6e831c09d
@ -270,27 +270,17 @@ LLVM Address Space number is used throughout LLVM (for example, in LLVM IR).
|
||||
.. table:: Address Space Mapping
|
||||
:name: amdgpu-address-space-mapping-table
|
||||
|
||||
================== ================= =================
|
||||
================== =================
|
||||
LLVM Address Space Memory Space
|
||||
------------------ -----------------------------------
|
||||
\ Current Default Future Default
|
||||
================== ================= =================
|
||||
0 Generic (Flat) Generic (Flat)
|
||||
1 Global Global
|
||||
2 Constant Region (GDS)
|
||||
3 Local (group/LDS) Local (group/LDS)
|
||||
4 Region (GDS) Constant
|
||||
5 Private (Scratch) Private (Scratch)
|
||||
6 Constant 32-bit Constant 32-bit
|
||||
================== ================= =================
|
||||
|
||||
Current Default
|
||||
This is the current default address space mapping used for all languages.
|
||||
This will shortly be deprecated.
|
||||
|
||||
Future Default
|
||||
This will shortly be the only address space mapping for all languages using
|
||||
AMDGPU backend.
|
||||
================== =================
|
||||
0 Generic (Flat)
|
||||
1 Global
|
||||
2 Region (GDS)
|
||||
3 Local (group/LDS)
|
||||
4 Constant
|
||||
5 Private (Scratch)
|
||||
6 Constant 32-bit
|
||||
================== =================
|
||||
|
||||
.. _amdgpu-memory-scopes:
|
||||
|
||||
|
@ -83,22 +83,22 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
|
||||
|
||||
def int_amdgcn_dispatch_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
def int_amdgcn_queue_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
def int_amdgcn_kernarg_segment_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
def int_amdgcn_implicitarg_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
def int_amdgcn_groupstaticsize :
|
||||
@ -111,7 +111,7 @@ def int_amdgcn_dispatch_id :
|
||||
|
||||
def int_amdgcn_implicit_buffer_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
// Set EXEC to the 64-bit value given.
|
||||
|
@ -222,7 +222,7 @@ struct AMDGPUAS {
|
||||
MAX_COMMON_ADDRESS = 5,
|
||||
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
|
||||
CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
|
||||
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
|
||||
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
|
||||
|
@ -61,7 +61,7 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
|
||||
/* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
|
||||
};
|
||||
static const AliasResult ASAliasRulesGenIsZero[6][6] = {
|
||||
/* Flat Global Constant Group Region Private */
|
||||
/* Flat Global Region Group Constant Private */
|
||||
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
|
||||
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
|
||||
/* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias},
|
||||
@ -72,9 +72,9 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
|
||||
assert(AS.MAX_COMMON_ADDRESS <= 5);
|
||||
if (AS.FLAT_ADDRESS == 0) {
|
||||
assert(AS.GLOBAL_ADDRESS == 1 &&
|
||||
AS.REGION_ADDRESS == 4 &&
|
||||
AS.REGION_ADDRESS == 2 &&
|
||||
AS.LOCAL_ADDRESS == 3 &&
|
||||
AS.CONSTANT_ADDRESS == 2 &&
|
||||
AS.CONSTANT_ADDRESS == 4 &&
|
||||
AS.PRIVATE_ADDRESS == 5);
|
||||
ASAliasRules = &ASAliasRulesGenIsZero;
|
||||
} else {
|
||||
|
@ -116,7 +116,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
||||
|
||||
if (Info->hasKernargSegmentPtr()) {
|
||||
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
|
||||
const LLT P2 = LLT::pointer(2, 64);
|
||||
const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
unsigned VReg = MRI.createGenericVirtualRegister(P2);
|
||||
MRI.addLiveIn(InputPtrReg, VReg);
|
||||
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
|
||||
|
@ -12,6 +12,7 @@
|
||||
/// \todo This should be generated by TableGen.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPULegalizerInfo.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
@ -29,8 +30,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
|
||||
const LLT V2S16 = LLT::vector(2, 16);
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
const LLT S64 = LLT::scalar(64);
|
||||
const LLT P1 = LLT::pointer(1, 64);
|
||||
const LLT P2 = LLT::pointer(2, 64);
|
||||
const LLT P1 = LLT::pointer(AMDGPUAS::GLOBAL_ADDRESS, 64);
|
||||
const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
|
||||
setAction({G_ADD, S32}, Legal);
|
||||
setAction({G_AND, S32}, Legal);
|
||||
|
@ -266,7 +266,7 @@ static StringRef computeDataLayout(const Triple &TT) {
|
||||
|
||||
// 32-bit private, local, and region pointers. 64-bit global, constant and
|
||||
// flat.
|
||||
return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32"
|
||||
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
|
||||
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
|
||||
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
|
||||
}
|
||||
|
@ -929,7 +929,7 @@ AMDGPUAS getAMDGPUAS(Triple T) {
|
||||
AMDGPUAS AS;
|
||||
AS.FLAT_ADDRESS = 0;
|
||||
AS.PRIVATE_ADDRESS = 5;
|
||||
AS.REGION_ADDRESS = 4;
|
||||
AS.REGION_ADDRESS = 2;
|
||||
return AS;
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
# REQUIRES: global-isel
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
|
||||
define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
|
||||
...
|
||||
---
|
||||
|
||||
@ -91,50 +91,50 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr(p2) = COPY $sgpr0_sgpr1
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4
|
||||
%2:sgpr(p2) = G_GEP %0, %1
|
||||
%2:sgpr(p4) = G_GEP %0, %1
|
||||
%3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %3
|
||||
|
||||
%4:sgpr(s64) = G_CONSTANT i64 1020
|
||||
%5:sgpr(p2) = G_GEP %0, %4
|
||||
%5:sgpr(p4) = G_GEP %0, %4
|
||||
%6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %6
|
||||
|
||||
%7:sgpr(s64) = G_CONSTANT i64 1024
|
||||
%8:sgpr(p2) = G_GEP %0, %7
|
||||
%8:sgpr(p4) = G_GEP %0, %7
|
||||
%9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %9
|
||||
|
||||
%10:sgpr(s64) = G_CONSTANT i64 1048572
|
||||
%11:sgpr(p2) = G_GEP %0, %10
|
||||
%11:sgpr(p4) = G_GEP %0, %10
|
||||
%12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %12
|
||||
|
||||
%13:sgpr(s64) = G_CONSTANT i64 1048576
|
||||
%14:sgpr(p2) = G_GEP %0, %13
|
||||
%14:sgpr(p4) = G_GEP %0, %13
|
||||
%15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %15
|
||||
|
||||
%16:sgpr(s64) = G_CONSTANT i64 17179869180
|
||||
%17:sgpr(p2) = G_GEP %0, %16
|
||||
%17:sgpr(p4) = G_GEP %0, %16
|
||||
%18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %18
|
||||
|
||||
%19:sgpr(s64) = G_CONSTANT i64 17179869184
|
||||
%20:sgpr(p2) = G_GEP %0, %19
|
||||
%20:sgpr(p4) = G_GEP %0, %19
|
||||
%21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %21
|
||||
|
||||
%22:sgpr(s64) = G_CONSTANT i64 4294967292
|
||||
%23:sgpr(p2) = G_GEP %0, %22
|
||||
%23:sgpr(p4) = G_GEP %0, %22
|
||||
%24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %24
|
||||
|
||||
%25:sgpr(s64) = G_CONSTANT i64 4294967296
|
||||
%26:sgpr(p2) = G_GEP %0, %25
|
||||
%26:sgpr(p4) = G_GEP %0, %25
|
||||
%27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0)
|
||||
$sgpr0 = COPY %27
|
||||
|
||||
|
@ -18,28 +18,28 @@ define amdgpu_vs void @test_f32(float %arg0) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: test_ptr2_byval
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; CHECK: G_LOAD [[S01]]
|
||||
define amdgpu_vs void @test_ptr2_byval(i32 addrspace(2)* byval %arg0) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(2)* %arg0
|
||||
define amdgpu_vs void @test_ptr2_byval(i32 addrspace(4)* byval %arg0) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: test_ptr2_inreg
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; CHECK: G_LOAD [[S01]]
|
||||
define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(2)* inreg %arg0) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(2)* %arg0
|
||||
define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: test_sgpr_alignment0
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK: [[S23:%[0-9]+]]:_(p2) = COPY $sgpr2_sgpr3
|
||||
; CHECK: [[S23:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
|
||||
; CHECK: G_LOAD [[S23]]
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
|
||||
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(2)* inreg %arg1) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(2)* %arg1
|
||||
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg1
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
# REQUIRES: global-isel
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
|
||||
define amdgpu_kernel void @load_constant(i32 addrspace(4)* %ptr0) { ret void }
|
||||
define amdgpu_kernel void @load_global_uniform(i32 addrspace(1)* %ptr1) {
|
||||
%tmp0 = load i32, i32 addrspace(1)* %ptr1
|
||||
ret void
|
||||
@ -30,7 +30,7 @@ legalized: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
%0:_(p2) = COPY $sgpr0_sgpr1
|
||||
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||
%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0)
|
||||
...
|
||||
|
||||
|
@ -9,10 +9,10 @@
|
||||
; GCN-LABEL: {{^}}smrd0:
|
||||
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
|
||||
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 1
|
||||
%1 = load i32, i32 addrspace(4)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -21,10 +21,10 @@ entry:
|
||||
; GCN-LABEL: {{^}}smrd1:
|
||||
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 255
|
||||
%1 = load i32, i32 addrspace(4)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -36,10 +36,10 @@ entry:
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 256
|
||||
%1 = load i32, i32 addrspace(4)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -51,10 +51,10 @@ entry:
|
||||
; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
|
||||
; TODO: Add VI checks
|
||||
; XGCN: s_endpgm
|
||||
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296 ; 2 ^ 32
|
||||
%1 = load i32, i32 addrspace(4)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -65,10 +65,10 @@ entry:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
|
||||
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143
|
||||
%1 = load i32, i32 addrspace(4)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -79,10 +79,10 @@ entry:
|
||||
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
%0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144
|
||||
%1 = load i32, i32 addrspace(4)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -27,9 +27,9 @@ define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
|
||||
|
||||
; VI: s_add_i32
|
||||
; VI: s_add_i32
|
||||
define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
|
||||
%b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
|
||||
define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0, <2 x i16> addrspace(4)* %in1) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
|
||||
%b = load <2 x i16>, <2 x i16> addrspace(4)* %in1
|
||||
%add = add <2 x i16> %a, %b
|
||||
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -41,8 +41,8 @@ define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
|
||||
|
||||
; VI: s_add_i32
|
||||
; VI: s_add_i32
|
||||
define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
|
||||
define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
|
||||
%add = add <2 x i16> %a, %a
|
||||
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -100,8 +100,8 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p
|
||||
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
|
||||
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
|
||||
; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
|
||||
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
|
||||
%stof = addrspacecast i32 addrspace(2)* %ptr to i32*
|
||||
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 {
|
||||
%stof = addrspacecast i32 addrspace(4)* %ptr to i32*
|
||||
%ld = load volatile i32, i32* %stof
|
||||
ret void
|
||||
}
|
||||
@ -160,8 +160,8 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
|
||||
; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
|
||||
; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
|
||||
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
|
||||
%ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
|
||||
load volatile i32, i32 addrspace(2)* %ftos
|
||||
%ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
|
||||
load volatile i32, i32 addrspace(4)* %ftos
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -4,9 +4,9 @@
|
||||
; This test just checks that the compiler doesn't crash.
|
||||
|
||||
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
|
||||
define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
|
||||
define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(4)* inreg) #0 {
|
||||
entry:
|
||||
%1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
|
||||
%1 = load <32 x i8>, <32 x i8> addrspace(4)* %0
|
||||
%2 = bitcast <32 x i8> %1 to <8 x i32>
|
||||
%3 = extractelement <8 x i32> %2, i32 1
|
||||
%4 = icmp ne i32 %3, 0
|
||||
|
@ -48,12 +48,12 @@
|
||||
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
|
||||
|
||||
|
||||
; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(2)* [[DISPATCH_PTR]] to i32 addrspace(2)*
|
||||
; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 1
|
||||
; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP0]], align 4, !invariant.load !0
|
||||
; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 2
|
||||
; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP1]], align 4, !range !1, !invariant.load !0
|
||||
; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
|
||||
; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 1
|
||||
; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP0]], align 4, !invariant.load !0
|
||||
; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 2
|
||||
; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]], align 4, !range !1, !invariant.load !0
|
||||
; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16
|
||||
|
||||
; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !2
|
||||
|
@ -8,10 +8,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.z() #0
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
|
||||
declare i64 @llvm.amdgcn.dispatch.id() #0
|
||||
|
||||
; HSA: define void @use_workitem_id_x() #1 {
|
||||
@ -58,15 +58,15 @@ define void @use_workgroup_id_z() #1 {
|
||||
|
||||
; HSA: define void @use_dispatch_ptr() #7 {
|
||||
define void @use_dispatch_ptr() #1 {
|
||||
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
store volatile i8 addrspace(2)* %dispatch.ptr, i8 addrspace(2)* addrspace(1)* undef
|
||||
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @use_queue_ptr() #8 {
|
||||
define void @use_queue_ptr() #1 {
|
||||
%queue.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
|
||||
store volatile i8 addrspace(2)* %queue.ptr, i8 addrspace(2)* addrspace(1)* undef
|
||||
%queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
|
||||
store volatile i8 addrspace(4)* %queue.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -186,22 +186,22 @@ define void @call_recursive_use_workitem_id_y() #1 {
|
||||
|
||||
; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 {
|
||||
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
|
||||
store volatile i32 0, i32 addrspace(4)* %stof
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
|
||||
store volatile i32 0, i32 addrspace(2)* %stof
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 {
|
||||
define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
|
||||
store volatile i32 0, i32 addrspace(4)* %stof
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
|
||||
store volatile i32 0, i32 addrspace(2)* %stof
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 {
|
||||
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
|
||||
store volatile i32 0, i32 addrspace(4)* %stof
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
|
||||
store volatile i32 0, i32 addrspace(2)* %stof
|
||||
call void @func_indirect_use_queue_ptr()
|
||||
ret void
|
||||
}
|
||||
@ -226,8 +226,8 @@ define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
|
||||
|
||||
; HSA: define void @use_kernarg_segment_ptr() #14 {
|
||||
define void @use_kernarg_segment_ptr() #1 {
|
||||
%kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
store volatile i8 addrspace(2)* %kernarg.segment.ptr, i8 addrspace(2)* addrspace(1)* undef
|
||||
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -239,15 +239,15 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
|
||||
|
||||
; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
|
||||
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
|
||||
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @use_implicitarg_ptr() #15 {
|
||||
define void @use_implicitarg_ptr() #1 {
|
||||
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -8,9 +8,9 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.z() #0
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
|
||||
; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
@ -149,27 +149,27 @@ define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
|
||||
|
||||
; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
|
||||
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
|
||||
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
|
||||
%val = load i32, i32 addrspace(2)* %bc
|
||||
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
|
||||
%val = load i32, i32 addrspace(4)* %bc
|
||||
store i32 %val, i32 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
|
||||
define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
|
||||
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
|
||||
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
|
||||
%val = load i32, i32 addrspace(2)* %bc
|
||||
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
|
||||
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
|
||||
%val = load i32, i32 addrspace(4)* %bc
|
||||
store i32 %val, i32 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
|
||||
define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
|
||||
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
|
||||
%val = load i32, i32 addrspace(2)* %bc
|
||||
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
|
||||
%val = load i32, i32 addrspace(4)* %bc
|
||||
store i32 %val, i32 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
@ -210,9 +210,9 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
|
||||
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
|
||||
%stof = addrspacecast i32 addrspace(2)* %ptr to i32*
|
||||
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
|
||||
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
|
||||
%stof = addrspacecast i32 addrspace(4)* %ptr to i32*
|
||||
%ld = load volatile i32, i32* %stof
|
||||
ret void
|
||||
}
|
||||
@ -226,8 +226,8 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
|
||||
|
||||
; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
|
||||
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
|
||||
%ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
|
||||
%ld = load volatile i32, i32 addrspace(2)* %ftos
|
||||
%ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
|
||||
%ld = load volatile i32, i32 addrspace(4)* %ftos
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -358,7 +358,7 @@ bb0:
|
||||
br i1 %cmp0, label %bb2, label %bb1
|
||||
|
||||
bb1:
|
||||
%val = load volatile i32, i32 addrspace(2)* undef
|
||||
%val = load volatile i32, i32 addrspace(4)* undef
|
||||
%cmp1 = icmp eq i32 %val, 3
|
||||
br i1 %cmp1, label %bb3, label %bb2
|
||||
|
||||
|
@ -345,7 +345,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
|
||||
%ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
|
||||
call void @external_void_func_v8i32(<8 x i32> %val)
|
||||
ret void
|
||||
@ -359,7 +359,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
|
||||
%ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
|
||||
call void @external_void_func_v16i32(<16 x i32> %val)
|
||||
ret void
|
||||
@ -377,7 +377,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
|
||||
%ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
|
||||
call void @external_void_func_v32i32(<32 x i32> %val)
|
||||
ret void
|
||||
@ -405,7 +405,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
|
||||
; GCN: s_swappc_b64
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
|
||||
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
|
||||
%val1 = load i32, i32 addrspace(1)* undef
|
||||
call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
|
||||
@ -430,7 +430,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_swappc_b64
|
||||
define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
|
||||
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(2)* undef
|
||||
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
|
||||
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
|
||||
call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
|
||||
ret void
|
||||
@ -516,7 +516,7 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_external_void_func_v16i8:
|
||||
define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
|
||||
%ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
|
||||
call void @external_void_func_v16i8(<16 x i8> %val)
|
||||
ret void
|
||||
|
@ -4,9 +4,9 @@
|
||||
; GCN-LABEL: {{^}}use_dispatch_ptr:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
||||
define void @use_dispatch_ptr() #1 {
|
||||
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%value = load volatile i32, i32 addrspace(2)* %header_ptr
|
||||
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
||||
%value = load volatile i32, i32 addrspace(4)* %header_ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -21,9 +21,9 @@ define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
|
||||
; GCN-LABEL: {{^}}use_queue_ptr:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
||||
define void @use_queue_ptr() #1 {
|
||||
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
|
||||
%value = load volatile i32, i32 addrspace(2)* %header_ptr
|
||||
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
||||
%value = load volatile i32, i32 addrspace(4)* %header_ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -62,9 +62,9 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
|
||||
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
||||
define void @use_kernarg_segment_ptr() #1 {
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
|
||||
%value = load volatile i32, i32 addrspace(2)* %header_ptr
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
||||
%value = load volatile i32, i32 addrspace(4)* %header_ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -435,17 +435,17 @@ define void @use_every_sgpr_input() #1 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
||||
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
|
||||
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
||||
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
|
||||
|
||||
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
%queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
|
||||
%val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
|
||||
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
||||
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
|
||||
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
|
||||
%val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
||||
%val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
|
||||
|
||||
%val3 = call i64 @llvm.amdgcn.dispatch.id()
|
||||
call void asm sideeffect "; use $0", "s"(i64 %val3)
|
||||
@ -515,17 +515,17 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
||||
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
|
||||
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
||||
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
|
||||
|
||||
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
%queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
|
||||
%val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
|
||||
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
||||
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
|
||||
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
|
||||
%val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
||||
%val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
|
||||
|
||||
%val3 = call i64 @llvm.amdgcn.dispatch.id()
|
||||
call void asm sideeffect "; use $0", "s"(i64 %val3)
|
||||
@ -573,17 +573,17 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
|
||||
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
||||
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
|
||||
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
||||
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
|
||||
|
||||
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
%queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
|
||||
%val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
|
||||
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
||||
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
|
||||
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
|
||||
%val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
|
||||
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
%kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
||||
%val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
|
||||
|
||||
%val3 = call i64 @llvm.amdgcn.dispatch.id()
|
||||
call void asm sideeffect "; use $0", "s"(i64 %val3)
|
||||
@ -603,10 +603,10 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y() #0
|
||||
declare i32 @llvm.amdgcn.workgroup.id.z() #0
|
||||
declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
declare noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
declare i64 @llvm.amdgcn.dispatch.id() #0
|
||||
declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable }
|
||||
attributes #1 = { nounwind noinline }
|
||||
|
@ -87,12 +87,12 @@ define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32* %in, i64 7
|
||||
%cast = addrspacecast i32* %in.gep to i32 addrspace(2)*
|
||||
%cast = addrspacecast i32* %in.gep to i32 addrspace(4)*
|
||||
%tmp0 = icmp eq i32 %cond, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %cast
|
||||
%tmp1 = load i32, i32 addrspace(4)* %cast
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
|
@ -268,23 +268,23 @@ done:
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_constant_small_offset_i32
|
||||
; OPT-NOT: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-NOT: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT: br i1
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
|
||||
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %in.gep
|
||||
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
@ -297,23 +297,23 @@ done:
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
|
||||
; OPT-NOT: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-NOT: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT: br i1
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
|
||||
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 255
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %in.gep
|
||||
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
@ -326,9 +326,9 @@ done:
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
|
||||
; OPT-SI: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-SI: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT-VI-NOT: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT: br i1
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
|
||||
@ -337,16 +337,16 @@ done:
|
||||
|
||||
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
|
||||
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 256
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %in.gep
|
||||
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
@ -359,8 +359,8 @@ done:
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
|
||||
; OPT-SI: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-SI: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT: br i1
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
|
||||
@ -369,16 +369,16 @@ done:
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
|
||||
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
|
||||
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 4294967295
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %in.gep
|
||||
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
@ -391,7 +391,7 @@ done:
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
|
||||
; OPT: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT: br i1
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
|
||||
@ -400,16 +400,16 @@ done:
|
||||
; GCN: s_addc_u32
|
||||
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
|
||||
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 17179869181
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %in.gep
|
||||
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
@ -430,16 +430,16 @@ done:
|
||||
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
|
||||
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
|
||||
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262143
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %in.gep
|
||||
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
@ -452,9 +452,9 @@ done:
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
|
||||
; OPT-SI: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-VI: getelementptr i32, i32 addrspace(2)*
|
||||
; OPT-SI: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT-VI: getelementptr i32, i32 addrspace(4)*
|
||||
; OPT: br i1
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
|
||||
@ -468,16 +468,16 @@ done:
|
||||
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
|
||||
|
||||
; GCN: s_or_b64 exec, exec
|
||||
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
|
||||
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262144
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = load i32, i32 addrspace(2)* %in.gep
|
||||
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
@ -524,17 +524,17 @@ bb34:
|
||||
; OPT: br i1 %tmp0,
|
||||
; OPT: if:
|
||||
; OPT: getelementptr i8, {{.*}} 4095
|
||||
define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(4)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
|
||||
%in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
|
||||
%in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
|
||||
%tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
|
||||
%bitcast = bitcast i8 addrspace(4)* %in.gep to i32 addrspace(4)*
|
||||
%tmp1 = load i32, i32 addrspace(4)* %bitcast, align 1
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
|
@ -32,9 +32,9 @@ endif:
|
||||
; GCN: v_add_f64
|
||||
; GCN: v_cndmask_b32_e32
|
||||
; GCN: v_cndmask_b32_e32
|
||||
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%v = load double, double addrspace(2)* %in
|
||||
%v = load double, double addrspace(4)* %in
|
||||
%cc = fcmp oeq double %v, 1.000000e+00
|
||||
br i1 %cc, label %if, label %endif
|
||||
|
||||
|
@ -187,9 +187,9 @@ endif:
|
||||
|
||||
; GCN: [[ENDIF]]:
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(2)* %in, float %cnd) #0 {
|
||||
define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(4)* %in, float %cnd) #0 {
|
||||
entry:
|
||||
%v = load i32, i32 addrspace(2)* %in
|
||||
%v = load i32, i32 addrspace(4)* %in
|
||||
%cc = fcmp oeq float %cnd, 1.000000e+00
|
||||
br i1 %cc, label %if, label %endif
|
||||
|
||||
@ -206,9 +206,9 @@ endif:
|
||||
|
||||
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_constant_load:
|
||||
; GCN: v_cndmask_b32
|
||||
define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%v = load float, float addrspace(2)* %in
|
||||
%v = load float, float addrspace(4)* %in
|
||||
%cc = fcmp oeq float %v, 1.000000e+00
|
||||
br i1 %cc, label %if, label %endif
|
||||
|
||||
@ -248,9 +248,9 @@ endif:
|
||||
; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
|
||||
; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]]
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(2)* %in, i32 %cond) #0 {
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 {
|
||||
entry:
|
||||
%v = load i32, i32 addrspace(2)* %in
|
||||
%v = load i32, i32 addrspace(4)* %in
|
||||
%cc = icmp eq i32 %cond, 1
|
||||
br i1 %cc, label %if, label %endif
|
||||
|
||||
@ -295,9 +295,9 @@ endif:
|
||||
; GCN: s_addc_u32
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
|
||||
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(2)* %in, i32 %cond) #0 {
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(4)* %in, i32 %cond) #0 {
|
||||
entry:
|
||||
%v = load i64, i64 addrspace(2)* %in
|
||||
%v = load i64, i64 addrspace(4)* %in
|
||||
%cc = icmp eq i32 %cond, 1
|
||||
br i1 %cc, label %if, label %endif
|
||||
|
||||
@ -320,9 +320,9 @@ endif:
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
|
||||
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(2)* %in, i32 %cond) #0 {
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(4)* %in, i32 %cond) #0 {
|
||||
entry:
|
||||
%v = load <3 x i32>, <3 x i32> addrspace(2)* %in
|
||||
%v = load <3 x i32>, <3 x i32> addrspace(4)* %in
|
||||
%cc = icmp eq i32 %cond, 1
|
||||
br i1 %cc, label %if, label %endif
|
||||
|
||||
@ -345,9 +345,9 @@ endif:
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
|
||||
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(2)* %in, i32 %cond) #0 {
|
||||
define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(4)* %in, i32 %cond) #0 {
|
||||
entry:
|
||||
%v = load <4 x i32>, <4 x i32> addrspace(2)* %in
|
||||
%v = load <4 x i32>, <4 x i32> addrspace(4)* %in
|
||||
%cc = icmp eq i32 %cond, 1
|
||||
br i1 %cc, label %if, label %endif
|
||||
|
||||
|
@ -8,8 +8,8 @@
|
||||
; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
|
||||
; GCN-DAG: buffer_store_short [[VELT0]]
|
||||
; GCN-DAG: buffer_store_short [[VELT1]]
|
||||
define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
|
||||
%p0 = extractelement <2 x half> %vec, i32 0
|
||||
%p1 = extractelement <2 x half> %vec, i32 1
|
||||
%out1 = getelementptr half, half addrspace(1)* %out, i32 10
|
||||
@ -26,8 +26,8 @@ define amdgpu_kernel void @extract_vector_elt_v2f16(half addrspace(1)* %out, <2
|
||||
; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
|
||||
; GCN: buffer_store_short [[VELT1]]
|
||||
; GCN: ScratchSize: 0
|
||||
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr, i32 %idx) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 %idx) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
|
||||
%elt = extractelement <2 x half> %vec, i32 %idx
|
||||
store half %elt, half addrspace(1)* %out, align 2
|
||||
ret void
|
||||
@ -45,12 +45,12 @@ define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(half addrspace(
|
||||
; SI: buffer_store_short [[ELT]]
|
||||
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
|
||||
; GCN: ScratchSize: 0{{$}}
|
||||
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(half addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
|
||||
define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(half addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
|
||||
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
|
||||
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
|
||||
%idx = load i32, i32 addrspace(1)* %gep
|
||||
%elt = extractelement <2 x half> %vec, i32 %idx
|
||||
store half %elt, half addrspace(1)* %out.gep, align 2
|
||||
|
@ -9,8 +9,8 @@
|
||||
; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
|
||||
; GCN-DAG: buffer_store_short [[VELT0]]
|
||||
; GCN-DAG: buffer_store_short [[VELT1]]
|
||||
define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%p0 = extractelement <2 x i16> %vec, i32 0
|
||||
%p1 = extractelement <2 x i16> %vec, i32 1
|
||||
%out1 = getelementptr i16, i16 addrspace(1)* %out, i32 10
|
||||
@ -27,8 +27,8 @@ define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x
|
||||
; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
|
||||
; GCN: buffer_store_short [[VELT1]]
|
||||
; GCN: ScratchSize: 0
|
||||
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %idx) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %idx) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%elt = extractelement <2 x i16> %vec, i32 %idx
|
||||
store i16 %elt, i16 addrspace(1)* %out, align 2
|
||||
ret void
|
||||
@ -45,13 +45,13 @@ define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(i16 addrspace(1
|
||||
; SI: buffer_store_short [[ELT]]
|
||||
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
|
||||
; GCN: ScratchSize: 0{{$}}
|
||||
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
|
||||
define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 addrspace(1)* %idx.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
|
||||
%idx = load volatile i32, i32 addrspace(1)* %gep
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%elt = extractelement <2 x i16> %vec, i32 %idx
|
||||
store i16 %elt, i16 addrspace(1)* %out.gep, align 2
|
||||
ret void
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
|
||||
; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x()
|
||||
declare void @llvm.amdgcn.s.barrier()
|
||||
@ -34,19 +34,19 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) {
|
||||
fence syncscope("workgroup") acquire
|
||||
%8 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4
|
||||
%9 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
|
||||
%10 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%10 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%11 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%12 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%13 = getelementptr inbounds i8, i8 addrspace(2)* %10, i64 4
|
||||
%14 = bitcast i8 addrspace(2)* %13 to i16 addrspace(2)*
|
||||
%15 = load i16, i16 addrspace(2)* %14, align 4
|
||||
%13 = getelementptr inbounds i8, i8 addrspace(4)* %10, i64 4
|
||||
%14 = bitcast i8 addrspace(4)* %13 to i16 addrspace(4)*
|
||||
%15 = load i16, i16 addrspace(4)* %14, align 4
|
||||
%16 = zext i16 %15 to i32
|
||||
%17 = mul i32 %12, %16
|
||||
%18 = add i32 %17, %11
|
||||
%19 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%19 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%20 = zext i32 %18 to i64
|
||||
%21 = bitcast i8 addrspace(2)* %19 to i64 addrspace(2)*
|
||||
%22 = load i64, i64 addrspace(2)* %21, align 8
|
||||
%21 = bitcast i8 addrspace(4)* %19 to i64 addrspace(4)*
|
||||
%22 = load i64, i64 addrspace(4)* %21, align 8
|
||||
%23 = add i64 %22, %20
|
||||
%24 = getelementptr inbounds i32, i32 addrspace(1)* %9, i64 %23
|
||||
store i32 %8, i32 addrspace(1)* %24, align 4
|
||||
@ -68,56 +68,56 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) {
|
||||
; <label>:4: ; preds = %58, %1
|
||||
%5 = load i32, i32 addrspace(5)* %3, align 4
|
||||
%6 = sext i32 %5 to i64
|
||||
%7 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%7 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%8 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%9 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%10 = getelementptr inbounds i8, i8 addrspace(2)* %7, i64 4
|
||||
%11 = bitcast i8 addrspace(2)* %10 to i16 addrspace(2)*
|
||||
%12 = load i16, i16 addrspace(2)* %11, align 4
|
||||
%10 = getelementptr inbounds i8, i8 addrspace(4)* %7, i64 4
|
||||
%11 = bitcast i8 addrspace(4)* %10 to i16 addrspace(4)*
|
||||
%12 = load i16, i16 addrspace(4)* %11, align 4
|
||||
%13 = zext i16 %12 to i32
|
||||
%14 = mul i32 %9, %13
|
||||
%15 = add i32 %14, %8
|
||||
%16 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%16 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%17 = zext i32 %15 to i64
|
||||
%18 = bitcast i8 addrspace(2)* %16 to i64 addrspace(2)*
|
||||
%19 = load i64, i64 addrspace(2)* %18, align 8
|
||||
%18 = bitcast i8 addrspace(4)* %16 to i64 addrspace(4)*
|
||||
%19 = load i64, i64 addrspace(4)* %18, align 8
|
||||
%20 = add i64 %19, %17
|
||||
%21 = icmp ult i64 %6, %20
|
||||
br i1 %21, label %22, label %61
|
||||
|
||||
; <label>:22: ; preds = %4
|
||||
%23 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%23 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%24 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%25 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%26 = getelementptr inbounds i8, i8 addrspace(2)* %23, i64 4
|
||||
%27 = bitcast i8 addrspace(2)* %26 to i16 addrspace(2)*
|
||||
%28 = load i16, i16 addrspace(2)* %27, align 4
|
||||
%26 = getelementptr inbounds i8, i8 addrspace(4)* %23, i64 4
|
||||
%27 = bitcast i8 addrspace(4)* %26 to i16 addrspace(4)*
|
||||
%28 = load i16, i16 addrspace(4)* %27, align 4
|
||||
%29 = zext i16 %28 to i32
|
||||
%30 = mul i32 %25, %29
|
||||
%31 = add i32 %30, %24
|
||||
%32 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%32 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%33 = zext i32 %31 to i64
|
||||
%34 = bitcast i8 addrspace(2)* %32 to i64 addrspace(2)*
|
||||
%35 = load i64, i64 addrspace(2)* %34, align 8
|
||||
%34 = bitcast i8 addrspace(4)* %32 to i64 addrspace(4)*
|
||||
%35 = load i64, i64 addrspace(4)* %34, align 8
|
||||
%36 = add i64 %35, %33
|
||||
%37 = add i64 %36, 2184
|
||||
%38 = trunc i64 %37 to i32
|
||||
%39 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
|
||||
%40 = load i32, i32 addrspace(5)* %3, align 4
|
||||
%41 = sext i32 %40 to i64
|
||||
%42 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%42 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%43 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%44 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%45 = getelementptr inbounds i8, i8 addrspace(2)* %42, i64 4
|
||||
%46 = bitcast i8 addrspace(2)* %45 to i16 addrspace(2)*
|
||||
%47 = load i16, i16 addrspace(2)* %46, align 4
|
||||
%45 = getelementptr inbounds i8, i8 addrspace(4)* %42, i64 4
|
||||
%46 = bitcast i8 addrspace(4)* %45 to i16 addrspace(4)*
|
||||
%47 = load i16, i16 addrspace(4)* %46, align 4
|
||||
%48 = zext i16 %47 to i32
|
||||
%49 = mul i32 %44, %48
|
||||
%50 = add i32 %49, %43
|
||||
%51 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%51 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%52 = zext i32 %50 to i64
|
||||
%53 = bitcast i8 addrspace(2)* %51 to i64 addrspace(2)*
|
||||
%54 = load i64, i64 addrspace(2)* %53, align 8
|
||||
%53 = bitcast i8 addrspace(4)* %51 to i64 addrspace(4)*
|
||||
%54 = load i64, i64 addrspace(4)* %53, align 8
|
||||
%55 = add i64 %54, %52
|
||||
%56 = add i64 %41, %55
|
||||
%57 = getelementptr inbounds i32, i32 addrspace(1)* %39, i64 %56
|
||||
@ -147,19 +147,19 @@ define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) {
|
||||
%2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
|
||||
store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
|
||||
%3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
|
||||
%4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%4 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%5 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%6 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%7 = getelementptr inbounds i8, i8 addrspace(2)* %4, i64 4
|
||||
%8 = bitcast i8 addrspace(2)* %7 to i16 addrspace(2)*
|
||||
%9 = load i16, i16 addrspace(2)* %8, align 4
|
||||
%7 = getelementptr inbounds i8, i8 addrspace(4)* %4, i64 4
|
||||
%8 = bitcast i8 addrspace(4)* %7 to i16 addrspace(4)*
|
||||
%9 = load i16, i16 addrspace(4)* %8, align 4
|
||||
%10 = zext i16 %9 to i32
|
||||
%11 = mul i32 %6, %10
|
||||
%12 = add i32 %11, %5
|
||||
%13 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%13 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%14 = zext i32 %12 to i64
|
||||
%15 = bitcast i8 addrspace(2)* %13 to i64 addrspace(2)*
|
||||
%16 = load i64, i64 addrspace(2)* %15, align 8
|
||||
%15 = bitcast i8 addrspace(4)* %13 to i64 addrspace(4)*
|
||||
%16 = load i64, i64 addrspace(4)* %15, align 8
|
||||
%17 = add i64 %16, %14
|
||||
%18 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %17
|
||||
store i32 1, i32 addrspace(1)* %18, align 4
|
||||
@ -178,19 +178,19 @@ define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) {
|
||||
fence syncscope("workgroup") acquire
|
||||
%24 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4
|
||||
%25 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
|
||||
%26 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%26 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%27 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%28 = call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%29 = getelementptr inbounds i8, i8 addrspace(2)* %26, i64 4
|
||||
%30 = bitcast i8 addrspace(2)* %29 to i16 addrspace(2)*
|
||||
%31 = load i16, i16 addrspace(2)* %30, align 4
|
||||
%29 = getelementptr inbounds i8, i8 addrspace(4)* %26, i64 4
|
||||
%30 = bitcast i8 addrspace(4)* %29 to i16 addrspace(4)*
|
||||
%31 = load i16, i16 addrspace(4)* %30, align 4
|
||||
%32 = zext i16 %31 to i32
|
||||
%33 = mul i32 %28, %32
|
||||
%34 = add i32 %33, %27
|
||||
%35 = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%35 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%36 = zext i32 %34 to i64
|
||||
%37 = bitcast i8 addrspace(2)* %35 to i64 addrspace(2)*
|
||||
%38 = load i64, i64 addrspace(2)* %37, align 8
|
||||
%37 = bitcast i8 addrspace(4)* %35 to i64 addrspace(4)*
|
||||
%38 = load i64, i64 addrspace(4)* %37, align 8
|
||||
%39 = add i64 %38, %36
|
||||
%40 = getelementptr inbounds i32, i32 addrspace(1)* %25, i64 %39
|
||||
store i32 %24, i32 addrspace(1)* %40, align 4
|
||||
|
@ -164,7 +164,7 @@ define <5 x i32> @v5i32_func_void() #0 {
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <8 x i32> @v8i32_func_void() #0 {
|
||||
%ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
|
||||
ret <8 x i32> %val
|
||||
}
|
||||
@ -177,7 +177,7 @@ define <8 x i32> @v8i32_func_void() #0 {
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <16 x i32> @v16i32_func_void() #0 {
|
||||
%ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
|
||||
ret <16 x i32> %val
|
||||
}
|
||||
@ -194,7 +194,7 @@ define <16 x i32> @v16i32_func_void() #0 {
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <32 x i32> @v32i32_func_void() #0 {
|
||||
%ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
|
||||
ret <32 x i32> %val
|
||||
}
|
||||
@ -214,7 +214,7 @@ define <2 x i64> @v2i64_func_void() #0 {
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <3 x i64> @v3i64_func_void() #0 {
|
||||
%ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
|
||||
ret <3 x i64> %val
|
||||
}
|
||||
@ -225,7 +225,7 @@ define <3 x i64> @v3i64_func_void() #0 {
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <4 x i64> @v4i64_func_void() #0 {
|
||||
%ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
|
||||
ret <4 x i64> %val
|
||||
}
|
||||
@ -237,7 +237,7 @@ define <4 x i64> @v4i64_func_void() #0 {
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <5 x i64> @v5i64_func_void() #0 {
|
||||
%ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
|
||||
ret <5 x i64> %val
|
||||
}
|
||||
@ -250,7 +250,7 @@ define <5 x i64> @v5i64_func_void() #0 {
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <8 x i64> @v8i64_func_void() #0 {
|
||||
%ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
|
||||
ret <8 x i64> %val
|
||||
}
|
||||
@ -267,7 +267,7 @@ define <8 x i64> @v8i64_func_void() #0 {
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define <16 x i64> @v16i64_func_void() #0 {
|
||||
%ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
|
||||
ret <16 x i64> %val
|
||||
}
|
||||
@ -309,7 +309,7 @@ define <4 x i16> @v4i16_func_void() #0 {
|
||||
; GFX9: v_lshrrev_b32_e32 v1, 16, v0
|
||||
; GCN: s_setpc_b64
|
||||
define <5 x i16> @v5i16_func_void() #0 {
|
||||
%ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
|
||||
ret <5 x i16> %val
|
||||
}
|
||||
@ -319,7 +319,7 @@ define <5 x i16> @v5i16_func_void() #0 {
|
||||
; GFX9: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define <8 x i16> @v8i16_func_void() #0 {
|
||||
%ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
|
||||
ret <8 x i16> %val
|
||||
}
|
||||
@ -330,7 +330,7 @@ define <8 x i16> @v8i16_func_void() #0 {
|
||||
; GFX9: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define <16 x i16> @v16i16_func_void() #0 {
|
||||
%ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
|
||||
ret <16 x i16> %val
|
||||
}
|
||||
@ -342,7 +342,7 @@ define <16 x i16> @v16i16_func_void() #0 {
|
||||
; GCN-DAG: v14
|
||||
; GCN-DAG: v15
|
||||
define <16 x i8> @v16i8_func_void() #0 {
|
||||
%ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
@ -356,7 +356,7 @@ define <16 x i8> @v16i8_func_void() #0 {
|
||||
; GFX89-DAG: v_lshrrev_b16_e32 v1, 8, v0
|
||||
; GCN: s_setpc_b64
|
||||
define <4 x i8> @v4i8_func_void() #0 {
|
||||
%ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
|
||||
ret <4 x i8> %val
|
||||
}
|
||||
@ -427,7 +427,7 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0)
|
||||
; GFX9: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define <33 x i32> @v33i32_func_void() #0 {
|
||||
%ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
|
||||
%val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
|
||||
ret <33 x i32> %val
|
||||
}
|
||||
@ -469,7 +469,7 @@ define <33 x i32> @v33i32_func_void() #0 {
|
||||
; GFX9: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
|
||||
%ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
|
||||
%val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
|
||||
ret { <32 x i32>, i32 }%val
|
||||
}
|
||||
@ -511,7 +511,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
|
||||
; GFX9: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
|
||||
%ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(2)* undef
|
||||
%ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
|
||||
%val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
|
||||
ret { i32, <32 x i32> }%val
|
||||
}
|
||||
|
@ -1,9 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s
|
||||
|
||||
@private1 = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
|
||||
@private2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
|
||||
@available_externally = available_externally addrspace(2) global [256 x i32] zeroinitializer
|
||||
@private1 = private unnamed_addr addrspace(4) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
|
||||
@private2 = private unnamed_addr addrspace(4) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
|
||||
@available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer
|
||||
|
||||
; GCN-LABEL: {{^}}private_test:
|
||||
; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
|
||||
@ -27,11 +27,11 @@
|
||||
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4
|
||||
|
||||
define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
|
||||
%ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index
|
||||
%val = load float, float addrspace(2)* %ptr
|
||||
%ptr = getelementptr [4 x float], [4 x float] addrspace(4) * @private1, i32 0, i32 %index
|
||||
%val = load float, float addrspace(4)* %ptr
|
||||
store volatile float %val, float addrspace(1)* %out
|
||||
%ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index
|
||||
%val2 = load float, float addrspace(2)* %ptr2
|
||||
%ptr2 = getelementptr [4 x float], [4 x float] addrspace(4) * @private2, i32 0, i32 %index
|
||||
%val2 = load float, float addrspace(4)* %ptr2
|
||||
store volatile float %val2, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -41,8 +41,8 @@ define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
|
||||
; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4
|
||||
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+4
|
||||
define amdgpu_kernel void @available_externally_test(i32 addrspace(1)* %out) {
|
||||
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(2)* @available_externally, i32 0, i32 1
|
||||
%val = load i32, i32 addrspace(2)* %ptr
|
||||
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(4)* @available_externally, i32 0, i32 1
|
||||
%val = load i32, i32 addrspace(4)* %ptr
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -4,9 +4,9 @@
|
||||
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
|
||||
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
|
||||
@b = internal addrspace(4) constant [1 x i16] [ i16 7 ], align 2
|
||||
|
||||
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
|
||||
@float_gv = internal unnamed_addr addrspace(4) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
|
||||
|
||||
; FUNC-LABEL: {{^}}float:
|
||||
; GCN: s_load_dword
|
||||
@ -17,13 +17,13 @@
|
||||
; EG-NOT: MOV
|
||||
define amdgpu_kernel void @float(float addrspace(1)* %out, i32 %index) {
|
||||
entry:
|
||||
%0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
|
||||
%1 = load float, float addrspace(2)* %0
|
||||
%0 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index
|
||||
%1 = load float, float addrspace(4)* %0
|
||||
store float %1, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
@i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
|
||||
@i32_gv = internal unnamed_addr addrspace(4) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
|
||||
|
||||
; FUNC-LABEL: {{^}}i32:
|
||||
|
||||
@ -35,8 +35,8 @@ entry:
|
||||
; EG-NOT: MOV
|
||||
define amdgpu_kernel void @i32(i32 addrspace(1)* %out, i32 %index) {
|
||||
entry:
|
||||
%0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
|
||||
%1 = load i32, i32 addrspace(2)* %0
|
||||
%0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(4)* @i32_gv, i32 0, i32 %index
|
||||
%1 = load i32, i32 addrspace(4)* %0
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -44,7 +44,7 @@ entry:
|
||||
|
||||
%struct.foo = type { float, [5 x i32] }
|
||||
|
||||
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
|
||||
@struct_foo_gv = internal unnamed_addr addrspace(4) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
|
||||
|
||||
; FUNC-LABEL: {{^}}struct_foo_gv_load:
|
||||
; GCN: s_load_dword
|
||||
@ -54,13 +54,13 @@ entry:
|
||||
; EG-NOT: MOVA_INT
|
||||
; EG-NOT: MOV
|
||||
define amdgpu_kernel void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
||||
%gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
|
||||
%load = load i32, i32 addrspace(2)* %gep, align 4
|
||||
%gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(4)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
|
||||
%load = load i32, i32 addrspace(4)* %gep, align 4
|
||||
store i32 %load, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
|
||||
@array_v1_gv = internal addrspace(4) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
|
||||
<1 x i32> <i32 2>,
|
||||
<1 x i32> <i32 3>,
|
||||
<1 x i32> <i32 4> ]
|
||||
@ -73,8 +73,8 @@ define amdgpu_kernel void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index
|
||||
; EG-NOT: MOVA_INT
|
||||
; EG-NOT: MOV
|
||||
define amdgpu_kernel void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
|
||||
%gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
|
||||
%load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
|
||||
%gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(4)* @array_v1_gv, i32 0, i32 %index
|
||||
%load = load <1 x i32>, <1 x i32> addrspace(4)* %gep, align 4
|
||||
store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
@ -90,8 +90,8 @@ entry:
|
||||
br i1 %0, label %if, label %else
|
||||
|
||||
if:
|
||||
%1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
|
||||
%2 = load float, float addrspace(2)* %1
|
||||
%1 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index
|
||||
%2 = load float, float addrspace(4)* %1
|
||||
store float %2, float addrspace(1)* %out
|
||||
br label %endif
|
||||
|
||||
|
@ -10,9 +10,9 @@
|
||||
|
||||
; HSA: .globl simple_align16
|
||||
; HSA: .p2align 5
|
||||
define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 {
|
||||
define void @simple_align16(i32 addrspace(1)* addrspace(4)* %ptr.out) align 32 {
|
||||
entry:
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -51,9 +51,9 @@
|
||||
; HSA: .size simple, .Lfunc_end0-simple
|
||||
; HSA: ; Function info:
|
||||
; HSA-NOT: COMPUTE_PGM_RSRC2
|
||||
define void @simple(i32 addrspace(1)* addrspace(2)* %ptr.out) {
|
||||
define void @simple(i32 addrspace(1)* addrspace(4)* %ptr.out) {
|
||||
entry:
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -61,9 +61,9 @@ entry:
|
||||
; Ignore explicit alignment that is too low.
|
||||
; HSA: .globl simple_align2
|
||||
; HSA: .p2align 2
|
||||
define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 {
|
||||
define void @simple_align2(i32 addrspace(1)* addrspace(4)* %ptr.out) align 2 {
|
||||
entry:
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -581,7 +581,7 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c)
|
||||
; CHECK-NEXT: ValueType: I8
|
||||
; CHECK-NEXT: AddrSpaceQual: Global
|
||||
define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
|
||||
i32 addrspace(2)* %c,
|
||||
i32 addrspace(4)* %c,
|
||||
i32 addrspace(3)* %l)
|
||||
!kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
|
||||
!kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
|
||||
|
@ -20,21 +20,21 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
|
||||
%.0.vec.insert = insertelement <2 x i32> undef, i32 %arg2, i32 0
|
||||
%.4.vec.insert = shufflevector <2 x i32> %.0.vec.insert, <2 x i32> %tmp6, <2 x i32> <i32 0, i32 3>
|
||||
%tmp7 = bitcast <2 x i32> %.4.vec.insert to i64
|
||||
%tmp8 = inttoptr i64 %tmp7 to [4294967295 x i8] addrspace(2)*
|
||||
%tmp8 = inttoptr i64 %tmp7 to [4294967295 x i8] addrspace(4)*
|
||||
%tmp9 = add <3 x i32> %arg3, %arg5
|
||||
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 32
|
||||
%tmp11 = bitcast i8 addrspace(2)* %tmp10 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0
|
||||
%tmp12 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp11, align 16
|
||||
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32
|
||||
%tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
|
||||
%tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16
|
||||
%tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(2)*
|
||||
%tmp16 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16
|
||||
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)*
|
||||
%tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
|
||||
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tmp17 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16
|
||||
%tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 64
|
||||
%tmp20 = bitcast i8 addrspace(2)* %tmp19 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0
|
||||
%tmp21 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp20, align 16
|
||||
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64
|
||||
%tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
|
||||
%tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16
|
||||
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
; GFX9-NOT: lshr
|
||||
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, 0x3e7, [[VEC]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%vecins = insertelement <2 x i16> %vec, i16 999, i32 0
|
||||
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -28,8 +28,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out,
|
||||
; GFX9-NOT: [[ELT0]]
|
||||
; GFX9-NOT: [[VEC]]
|
||||
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT0]], [[VEC]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
|
||||
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -48,8 +48,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %
|
||||
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
|
||||
; GFX9-DAG: s_pack_ll_b32_b16 s{{[0-9]+}}, [[ELT0]], [[ELT1]]
|
||||
; GFX9-DAG: ; use [[ELT1]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%elt1 = extractelement <2 x i16> %vec, i32 1
|
||||
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
|
||||
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
|
||||
@ -68,8 +68,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> ad
|
||||
; GFX9-NOT: [[ELT0]]
|
||||
; GFX9-NOT: [[VEC]]
|
||||
; GFX9: s_pack_hh_b32_b16 s{{[0-9]+}}, [[ELT_ARG]], [[VEC]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%elt.hi = lshr i32 %elt.arg, 16
|
||||
%elt = trunc i32 %elt.hi to i16
|
||||
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
|
||||
@ -88,8 +88,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)*
|
||||
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[ELT_ARG]], 16
|
||||
; GFX9: s_pack_lh_b32_b16 s{{[0-9]+}}, [[ELT1]], [[VEC]]
|
||||
; GFX9: ; use [[ELT1]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%elt.hi = lshr i32 %elt.arg, 16
|
||||
%elt = trunc i32 %elt.hi to i16
|
||||
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
|
||||
@ -113,8 +113,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a
|
||||
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[ELT_HI]], [[VEC_HI]]
|
||||
; GFX9: ; use [[ELT_HI]]
|
||||
; GFX9: ; use [[VEC_HI]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 %elt.arg) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 %elt.arg) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%elt.hi = lshr i32 %elt.arg, 16
|
||||
%elt = trunc i32 %elt.hi to i16
|
||||
%vec.hi = extractelement <2 x i16> %vec, i32 1
|
||||
@ -137,8 +137,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(<2 x i
|
||||
; CIVI: s_or_b32 [[INS:s[0-9]+]], [[ELT0]], 0x3e70000
|
||||
|
||||
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], 0x3e7
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%vecins = insertelement <2 x i16> %vec, i16 999, i32 1
|
||||
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -153,8 +153,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out,
|
||||
|
||||
; GCN-NOT: shlr
|
||||
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], [[ELT1]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i16 %elt) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i16 %elt) #0 {
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%vecins = insertelement <2 x i16> %vec, i16 %elt, i32 1
|
||||
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -167,8 +167,8 @@ define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %
|
||||
|
||||
; GFX9: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
|
||||
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, 0x4500, [[ELT1]]
|
||||
define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
|
||||
%vecins = insertelement <2 x half> %vec, half 5.000000e+00, i32 0
|
||||
store <2 x half> %vecins, <2 x half> addrspace(1)* %out
|
||||
ret void
|
||||
@ -182,8 +182,8 @@ define amdgpu_kernel void @s_insertelement_v2f16_0(<2 x half> addrspace(1)* %out
|
||||
; CIVI: s_or_b32 [[INS:s[0-9]+]], [[ELT0]], 0x45000000
|
||||
|
||||
; GFX9: s_pack_ll_b32_b16 s{{[0-9]+}}, [[VEC]], 0x4500
|
||||
define amdgpu_kernel void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %vec.ptr) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %vec.ptr) #0 {
|
||||
%vec = load <2 x half>, <2 x half> addrspace(4)* %vec.ptr
|
||||
%vecins = insertelement <2 x half> %vec, half 5.000000e+00, i32 1
|
||||
store <2 x half> %vecins, <2 x half> addrspace(1)* %out
|
||||
ret void
|
||||
@ -399,9 +399,9 @@ define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspac
|
||||
; GCN-DAG: s_lshl_b32 [[MASK:s[0-9]+]], 0xffff, [[SCALED_IDX]]
|
||||
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VVEC]]
|
||||
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 addrspace(2)* %idx.ptr) #0 {
|
||||
%idx = load volatile i32, i32 addrspace(2)* %idx.ptr
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr
|
||||
define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr, i32 addrspace(4)* %idx.ptr) #0 {
|
||||
%idx = load volatile i32, i32 addrspace(4)* %idx.ptr
|
||||
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
|
||||
%vecins = insertelement <2 x i16> %vec, i16 999, i32 %idx
|
||||
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -22,8 +22,8 @@ define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointe
|
||||
; GCN: s_load_dwordx2 s{{\[}}[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]{{\]}}
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
|
||||
; GCN: buffer_store_dword [[K]], off, s{{\[}}[[SPTR_LO]]:
|
||||
define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(2)* dereferenceable(4096) nonnull %in) #0 {
|
||||
%ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(2)* %in, !invariant.load !0
|
||||
define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(4)* dereferenceable(4096) nonnull %in) #0 {
|
||||
%ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(4)* %in, !invariant.load !0
|
||||
%ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1
|
||||
store i16 123, i16 addrspace(1)* %ptr, align 4
|
||||
store i16 456, i16 addrspace(1)* %ptr.1
|
||||
|
@ -14,10 +14,10 @@
|
||||
; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
|
||||
|
||||
define amdgpu_vs void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <4 x i32>] addrspace(2)* byval %arg3, [17 x <4 x i32>] addrspace(2)* inreg %arg4, [17 x <4 x i32>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
|
||||
define amdgpu_vs void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <32 x i8>] addrspace(4)* byval %arg2, [2 x <4 x i32>] addrspace(4)* byval %arg3, [17 x <4 x i32>] addrspace(4)* inreg %arg4, [17 x <4 x i32>] addrspace(4)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
|
||||
main_body:
|
||||
%tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(2)* %arg3, i64 0, i32 1
|
||||
%tmp10 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(4)* %arg3, i64 0, i32 1
|
||||
%tmp10 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp11 = shl i32 %arg6, 2
|
||||
%tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
|
||||
%tmp13 = bitcast i32 %tmp12 to float
|
||||
|
@ -7,13 +7,13 @@
|
||||
; GCN: enable_sgpr_dispatch_ptr = 1
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
|
||||
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
|
||||
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%value = load i32, i32 addrspace(2)* %header_ptr
|
||||
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
||||
%value = load i32, i32 addrspace(4)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
attributes #0 = { readnone }
|
||||
|
@ -2,23 +2,23 @@
|
||||
|
||||
; ERROR: in function test_kernel{{.*}}: non-hsa intrinsic with hsa target
|
||||
define amdgpu_kernel void @test_kernel(i32 addrspace(1)* %out) #1 {
|
||||
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
|
||||
%value = load i32, i32 addrspace(2)* %header_ptr
|
||||
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
|
||||
%value = load i32, i32 addrspace(4)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; ERROR: in function test_func{{.*}}: non-hsa intrinsic with hsa target
|
||||
define void @test_func(i32 addrspace(1)* %out) #1 {
|
||||
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%header_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
|
||||
%value = load i32, i32 addrspace(2)* %header_ptr
|
||||
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
|
||||
%value = load i32, i32 addrspace(4)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable }
|
||||
attributes #1 = { nounwind }
|
||||
|
@ -10,9 +10,9 @@
|
||||
define amdgpu_ps i32 @test_ps() #1 {
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
|
||||
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
|
||||
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
|
||||
%value = load volatile i32, i32 addrspace(4)* %buffer_ptr
|
||||
ret i32 %value
|
||||
}
|
||||
|
||||
@ -23,13 +23,13 @@ define amdgpu_ps i32 @test_ps() #1 {
|
||||
define amdgpu_cs i32 @test_cs() #1 {
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
%implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)*
|
||||
%value = load volatile i32, i32 addrspace(2)* %buffer_ptr
|
||||
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
|
||||
%value = load volatile i32, i32 addrspace(4)* %buffer_ptr
|
||||
ret i32 %value
|
||||
}
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable }
|
||||
attributes #1 = { nounwind }
|
||||
|
@ -11,9 +11,9 @@
|
||||
|
||||
; HSA: s_load_dword s0, s[4:5], 0x0
|
||||
define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
|
||||
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
|
||||
%load = load volatile i32, i32 addrspace(2)* %cast
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
|
||||
%load = load volatile i32, i32 addrspace(4)* %cast
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -26,9 +26,9 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
|
||||
|
||||
; HSA: s_load_dword s0, s[4:5], 0x1c
|
||||
define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
|
||||
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
|
||||
%load = load volatile i32, i32 addrspace(2)* %cast
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
|
||||
%load = load volatile i32, i32 addrspace(4)* %cast
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -38,9 +38,9 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @func_implicitarg_ptr() #1 {
|
||||
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
|
||||
%load = load volatile i32, i32 addrspace(2)* %cast
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
|
||||
%load = load volatile i32, i32 addrspace(4)* %cast
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -86,12 +86,12 @@ define void @func_call_implicitarg_ptr_func() #1 {
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0{{$}}
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0{{$}}
|
||||
define void @func_kernarg_implicitarg_ptr() #1 {
|
||||
%kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast.kernarg.segment.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
|
||||
%cast.implicitarg = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
|
||||
%load0 = load volatile i32, i32 addrspace(2)* %cast.kernarg.segment.ptr
|
||||
%load1 = load volatile i32, i32 addrspace(2)* %cast.implicitarg
|
||||
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
|
||||
%cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
|
||||
%load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
|
||||
%load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -106,8 +106,8 @@ define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8])
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #2
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #2
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
|
||||
|
||||
attributes #0 = { nounwind noinline }
|
||||
attributes #1 = { nounwind noinline }
|
||||
|
@ -11,10 +11,10 @@
|
||||
|
||||
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
|
||||
define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
|
||||
%kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
|
||||
%gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
|
||||
%value = load i32, i32 addrspace(2)* %gep
|
||||
%kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
|
||||
%value = load i32, i32 addrspace(4)* %gep
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -23,10 +23,10 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
|
||||
; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
|
||||
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
|
||||
define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
|
||||
%implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
|
||||
%gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
|
||||
%value = load i32, i32 addrspace(2)* %gep
|
||||
%implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
|
||||
%value = load i32, i32 addrspace(4)* %gep
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -42,9 +42,9 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
|
||||
; MESA: buffer_store_dword [[V_VAL]]
|
||||
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
|
||||
define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 {
|
||||
%implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%arg.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
|
||||
%val = load i32, i32 addrspace(2)* %arg.ptr
|
||||
%implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
|
||||
%val = load i32, i32 addrspace(4)* %arg.ptr
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -53,16 +53,16 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x
|
||||
; HSA: enable_sgpr_kernarg_segment_ptr = 1
|
||||
; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
|
||||
define amdgpu_kernel void @test_no_kernargs() #1 {
|
||||
%kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
|
||||
%gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
|
||||
%value = load i32, i32 addrspace(2)* %gep
|
||||
%kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
|
||||
%gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
|
||||
%value = load i32, i32 addrspace(4)* %gep
|
||||
store volatile i32 %value, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
@ -7,13 +7,13 @@
|
||||
; GCN: enable_sgpr_queue_ptr = 1
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
|
||||
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
|
||||
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
|
||||
%value = load i32, i32 addrspace(2)* %header_ptr
|
||||
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
||||
%value = load i32, i32 addrspace(4)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind
|
||||
declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
|
||||
declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind
|
||||
declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(4)* nocapture, i64, i1) nounwind
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
|
||||
@ -328,8 +328,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad
|
||||
}
|
||||
|
||||
; Test shouldConvertConstantLoadToIntImm
|
||||
@hello.align4 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 4
|
||||
@hello.align1 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 1
|
||||
@hello.align4 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 4
|
||||
@hello.align1 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 1
|
||||
|
||||
; FUNC-LABEL: {{^}}test_memcpy_const_string_align4:
|
||||
; SI: s_getpc_b64
|
||||
@ -341,8 +341,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
|
||||
%str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)*
|
||||
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(2)* align 4 %str, i64 32, i1 false)
|
||||
%str = bitcast [16 x i8] addrspace(4)* @hello.align4 to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(4)* align 4 %str, i64 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -366,7 +366,7 @@ define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noal
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
|
||||
%str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)*
|
||||
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i1 false)
|
||||
%str = bitcast [16 x i8] addrspace(4)* @hello.align1 to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(4)* %str, i64 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
@ -6,8 +6,8 @@
|
||||
; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
|
||||
; GCN-NOHSA: buffer_store_dwordx2
|
||||
; GCN-HSA: flat_store_dwordx2
|
||||
define amdgpu_kernel void @constant_load_f64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
|
||||
%ld = load double, double addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_f64(double addrspace(1)* %out, double addrspace(4)* %in) #0 {
|
||||
%ld = load double, double addrspace(4)* %in
|
||||
store double %ld, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -9,57 +9,57 @@
|
||||
|
||||
; EG: VTX_READ_8
|
||||
; EG: AND_INT
|
||||
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
|
||||
%load = load i1, i1 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
|
||||
%load = load i1, i1 addrspace(4)* %in
|
||||
store i1 %load, i1 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v2i1:
|
||||
define amdgpu_kernel void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
|
||||
store <2 x i1> %load, <2 x i1> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v3i1:
|
||||
define amdgpu_kernel void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
|
||||
store <3 x i1> %load, <3 x i1> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v4i1:
|
||||
define amdgpu_kernel void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
|
||||
store <4 x i1> %load, <4 x i1> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v8i1:
|
||||
define amdgpu_kernel void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
|
||||
store <8 x i1> %load, <8 x i1> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v16i1:
|
||||
define amdgpu_kernel void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
|
||||
store <16 x i1> %load, <16 x i1> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v32i1:
|
||||
define amdgpu_kernel void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
|
||||
store <32 x i1> %load, <32 x i1> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_v64i1:
|
||||
define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
|
||||
store <64 x i1> %load, <64 x i1> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -67,8 +67,8 @@ define amdgpu_kernel void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64
|
||||
; FUNC-LABEL: {{^}}constant_zextload_i1_to_i32:
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(4)* %in
|
||||
%ext = zext i1 %a to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -81,136 +81,136 @@ define amdgpu_kernel void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i
|
||||
|
||||
; EG: VTX_READ_8
|
||||
; EG: BFE_INT
|
||||
define amdgpu_kernel void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(4)* %in
|
||||
%ext = sext i1 %a to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i32:
|
||||
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
|
||||
%ext = zext <1 x i1> %load to <1 x i32>
|
||||
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i32:
|
||||
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
|
||||
%ext = sext <1 x i1> %load to <1 x i32>
|
||||
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i32:
|
||||
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
|
||||
%ext = zext <2 x i1> %load to <2 x i32>
|
||||
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i32:
|
||||
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
|
||||
%ext = sext <2 x i1> %load to <2 x i32>
|
||||
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i32:
|
||||
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
|
||||
%ext = zext <3 x i1> %load to <3 x i32>
|
||||
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i32:
|
||||
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
|
||||
%ext = sext <3 x i1> %load to <3 x i32>
|
||||
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i32:
|
||||
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
|
||||
%ext = zext <4 x i1> %load to <4 x i32>
|
||||
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i32:
|
||||
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
|
||||
%ext = sext <4 x i1> %load to <4 x i32>
|
||||
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i32:
|
||||
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
|
||||
%ext = zext <8 x i1> %load to <8 x i32>
|
||||
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i32:
|
||||
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
|
||||
%ext = sext <8 x i1> %load to <8 x i32>
|
||||
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i32:
|
||||
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
|
||||
%ext = zext <16 x i1> %load to <16 x i32>
|
||||
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i32:
|
||||
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
|
||||
%ext = sext <16 x i1> %load to <16 x i32>
|
||||
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i32:
|
||||
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
|
||||
%ext = zext <32 x i1> %load to <32 x i32>
|
||||
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i32:
|
||||
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
|
||||
%ext = sext <32 x i1> %load to <32 x i32>
|
||||
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i32:
|
||||
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
|
||||
%ext = zext <64 x i1> %load to <64 x i32>
|
||||
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i32:
|
||||
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
|
||||
%ext = sext <64 x i1> %load to <64 x i32>
|
||||
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -221,8 +221,8 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspac
|
||||
; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
|
||||
; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]
|
||||
; GCN: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(4)* %in
|
||||
%ext = zext i1 %a to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -233,136 +233,136 @@ define amdgpu_kernel void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i
|
||||
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
|
||||
; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
|
||||
; GCN: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(4)* nocapture %in) #0 {
|
||||
%a = load i1, i1 addrspace(4)* %in
|
||||
%ext = sext i1 %a to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i64:
|
||||
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
|
||||
%ext = zext <1 x i1> %load to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i64:
|
||||
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <1 x i1>, <1 x i1> addrspace(4)* %in
|
||||
%ext = sext <1 x i1> %load to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i64:
|
||||
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
|
||||
%ext = zext <2 x i1> %load to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i64:
|
||||
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <2 x i1>, <2 x i1> addrspace(4)* %in
|
||||
%ext = sext <2 x i1> %load to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i64:
|
||||
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
|
||||
%ext = zext <3 x i1> %load to <3 x i64>
|
||||
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i64:
|
||||
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <3 x i1>, <3 x i1> addrspace(4)* %in
|
||||
%ext = sext <3 x i1> %load to <3 x i64>
|
||||
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i64:
|
||||
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
|
||||
%ext = zext <4 x i1> %load to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i64:
|
||||
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <4 x i1>, <4 x i1> addrspace(4)* %in
|
||||
%ext = sext <4 x i1> %load to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i64:
|
||||
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
|
||||
%ext = zext <8 x i1> %load to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i64:
|
||||
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <8 x i1>, <8 x i1> addrspace(4)* %in
|
||||
%ext = sext <8 x i1> %load to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i64:
|
||||
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
|
||||
%ext = zext <16 x i1> %load to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i64:
|
||||
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <16 x i1>, <16 x i1> addrspace(4)* %in
|
||||
%ext = sext <16 x i1> %load to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i64:
|
||||
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
|
||||
%ext = zext <32 x i1> %load to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i64:
|
||||
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <32 x i1>, <32 x i1> addrspace(4)* %in
|
||||
%ext = sext <32 x i1> %load to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i64:
|
||||
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
|
||||
%ext = zext <64 x i1> %load to <64 x i64>
|
||||
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i64:
|
||||
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(4)* nocapture %in) #0 {
|
||||
%load = load <64 x i1>, <64 x i1> addrspace(4)* %in
|
||||
%ext = sext <64 x i1> %load to <64 x i64>
|
||||
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -8,9 +8,9 @@
|
||||
; GCN-HSA: flat_load_ushort
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load i16, i16 addrspace(2)* %in
|
||||
%ld = load i16, i16 addrspace(4)* %in
|
||||
store i16 %ld, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -19,9 +19,9 @@ entry:
|
||||
; GCN: s_load_dword s
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load <2 x i16>, <2 x i16> addrspace(2)* %in
|
||||
%ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
||||
store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -31,9 +31,9 @@ entry:
|
||||
|
||||
; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
|
||||
define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
|
||||
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
|
||||
store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -42,9 +42,9 @@ entry:
|
||||
; GCN: s_load_dwordx2
|
||||
|
||||
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load <4 x i16>, <4 x i16> addrspace(2)* %in
|
||||
%ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
||||
store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -53,9 +53,9 @@ entry:
|
||||
; GCN: s_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load <8 x i16>, <8 x i16> addrspace(2)* %in
|
||||
%ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
||||
store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -65,9 +65,9 @@ entry:
|
||||
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load <16 x i16>, <16 x i16> addrspace(2)* %in
|
||||
%ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
||||
store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -80,8 +80,8 @@ entry:
|
||||
; GCN-HSA: flat_store_dword
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(4)* %in
|
||||
%ext = zext i16 %a to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -97,8 +97,8 @@ define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out,
|
||||
; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
|
||||
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
|
||||
; EG: 16
|
||||
define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(4)* %in
|
||||
%ext = sext i16 %a to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -109,8 +109,8 @@ define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out,
|
||||
; GCN-HSA: flat_load_ushort
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
||||
%ext = zext <1 x i16> %load to <1 x i32>
|
||||
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -123,8 +123,8 @@ define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(
|
||||
; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
|
||||
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
|
||||
; EG: 16
|
||||
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
||||
%ext = sext <1 x i16> %load to <1 x i32>
|
||||
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -140,8 +140,8 @@ define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(
|
||||
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal
|
||||
; EG: 16
|
||||
; EG: 16
|
||||
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
||||
%ext = zext <2 x i16> %load to <2 x i32>
|
||||
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -160,8 +160,8 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(
|
||||
; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV\.[XYZW]}}, 0.0, literal
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: 16
|
||||
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
||||
%ext = sext <2 x i16> %load to <2 x i32>
|
||||
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -183,9 +183,9 @@ define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(
|
||||
; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal
|
||||
; EG-DAG: 65535
|
||||
; EG-DAG: 65535
|
||||
define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
|
||||
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
|
||||
%ext = zext <3 x i16> %ld to <3 x i32>
|
||||
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -204,9 +204,9 @@ entry:
|
||||
; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: 16
|
||||
define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
|
||||
entry:
|
||||
%ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
|
||||
%ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
|
||||
%ext = sext <3 x i16> %ld to <3 x i32>
|
||||
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -229,8 +229,8 @@ entry:
|
||||
; EG-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{T[0-9]\.[XYZW]}}, literal
|
||||
; EG-DAG: 65535
|
||||
; EG-DAG: 65535
|
||||
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
||||
%ext = zext <4 x i16> %load to <4 x i32>
|
||||
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -254,8 +254,8 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: 16
|
||||
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
||||
%ext = sext <4 x i16> %load to <4 x i32>
|
||||
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -288,8 +288,8 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(
|
||||
; EG-DAG: 65535
|
||||
; EG-DAG: 65535
|
||||
; EG-DAG: 65535
|
||||
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
||||
%ext = zext <8 x i16> %load to <8 x i32>
|
||||
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -322,8 +322,8 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: 16
|
||||
; EG-DAG: 16
|
||||
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
||||
%ext = sext <8 x i16> %load to <8 x i32>
|
||||
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -337,8 +337,8 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(
|
||||
; v16i16 is naturally 32 byte aligned
|
||||
; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 0, #1
|
||||
; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 16, #1
|
||||
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
||||
%ext = zext <16 x i16> %load to <16 x i32>
|
||||
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -352,8 +352,8 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspa
|
||||
; v16i16 is naturally 32 byte aligned
|
||||
; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 0, #1
|
||||
; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 16, #1
|
||||
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
||||
%ext = sext <16 x i16> %load to <16 x i32>
|
||||
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -369,8 +369,8 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspa
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
|
||||
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
||||
%ext = zext <32 x i16> %load to <32 x i32>
|
||||
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -385,8 +385,8 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspa
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
|
||||
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
||||
%ext = sext <32 x i16> %load to <32 x i32>
|
||||
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -404,8 +404,8 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspa
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
|
||||
define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <64 x i16>, <64 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
||||
%ext = zext <64 x i16> %load to <64 x i32>
|
||||
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -421,8 +421,8 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
|
||||
define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <64 x i16>, <64 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
||||
%ext = sext <64 x i16> %load to <64 x i32>
|
||||
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -438,8 +438,8 @@ define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspa
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; EG: MOV {{.*}}, 0.0
|
||||
define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(4)* %in
|
||||
%ext = zext i16 %a to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -464,8 +464,8 @@ define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out,
|
||||
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
|
||||
; TODO: These could be expanded earlier using ASHR 15
|
||||
; EG: 31
|
||||
define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%a = load i16, i16 addrspace(4)* %in
|
||||
%ext = sext i16 %a to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -475,8 +475,8 @@ define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out,
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; EG: MOV {{.*}}, 0.0
|
||||
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
||||
%ext = zext <1 x i16> %load to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -488,8 +488,8 @@ define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(
|
||||
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
|
||||
; TODO: These could be expanded earlier using ASHR 15
|
||||
; EG: 31
|
||||
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i16>, <1 x i16> addrspace(4)* %in
|
||||
%ext = sext <1 x i16> %load to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -498,8 +498,8 @@ define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
||||
%ext = zext <2 x i16> %load to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -508,8 +508,8 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i16>, <2 x i16> addrspace(4)* %in
|
||||
%ext = sext <2 x i16> %load to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -518,8 +518,8 @@ define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
|
||||
|
||||
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
||||
%ext = zext <4 x i16> %load to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -528,8 +528,8 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
|
||||
|
||||
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i16>, <4 x i16> addrspace(4)* %in
|
||||
%ext = sext <4 x i16> %load to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -538,8 +538,8 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
|
||||
|
||||
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
||||
%ext = zext <8 x i16> %load to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -548,8 +548,8 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
|
||||
|
||||
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i16>, <8 x i16> addrspace(4)* %in
|
||||
%ext = sext <8 x i16> %load to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -559,8 +559,8 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(
|
||||
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
||||
%ext = zext <16 x i16> %load to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -570,8 +570,8 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspa
|
||||
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i16>, <16 x i16> addrspace(4)* %in
|
||||
%ext = sext <16 x i16> %load to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -583,8 +583,8 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspa
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
|
||||
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
||||
%ext = zext <32 x i16> %load to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -596,8 +596,8 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspa
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
|
||||
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i16>, <32 x i16> addrspace(4)* %in
|
||||
%ext = sext <32 x i16> %load to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -606,16 +606,16 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspa
|
||||
; These trigger undefined register machine verifier errors
|
||||
|
||||
; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
|
||||
; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
|
||||
; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
|
||||
; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
||||
; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
||||
; %ext = zext <64 x i16> %load to <64 x i64>
|
||||
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
||||
; ret void
|
||||
; }
|
||||
|
||||
; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
|
||||
; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
|
||||
; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
|
||||
; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
|
||||
; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
|
||||
; %ext = sext <64 x i16> %load to <64 x i64>
|
||||
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
||||
; ret void
|
||||
|
@ -7,9 +7,9 @@
|
||||
; GCN: s_load_dword s{{[0-9]+}}
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
|
||||
define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load i32, i32 addrspace(2)* %in
|
||||
%ld = load i32, i32 addrspace(4)* %in
|
||||
store i32 %ld, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -18,9 +18,9 @@ entry:
|
||||
; GCN: s_load_dwordx2
|
||||
|
||||
; EG: VTX_READ_64
|
||||
define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
|
||||
store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -29,9 +29,9 @@ entry:
|
||||
; GCN: s_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
|
||||
%ld = load <3 x i32>, <3 x i32> addrspace(4)* %in
|
||||
store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -40,9 +40,9 @@ entry:
|
||||
; GCN: s_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
|
||||
store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -52,9 +52,9 @@ entry:
|
||||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
|
||||
store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -66,9 +66,9 @@ entry:
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
|
||||
store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -81,8 +81,8 @@ entry:
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
|
||||
; EG: CF_END
|
||||
; EG: VTX_READ_32
|
||||
define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
|
||||
%ld = load i32, i32 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
|
||||
%ld = load i32, i32 addrspace(4)* %in
|
||||
%ext = zext i32 %ld to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -98,8 +98,8 @@ define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out,
|
||||
; EG: VTX_READ_32
|
||||
; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
|
||||
; EG: 31
|
||||
define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
|
||||
%ld = load i32, i32 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
|
||||
%ld = load i32, i32 addrspace(4)* %in
|
||||
%ext = sext i32 %ld to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -108,8 +108,8 @@ define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out,
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
|
||||
; GCN: s_load_dword
|
||||
; GCN: store_dwordx2
|
||||
define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
|
||||
%ext = zext <1 x i32> %ld to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -119,8 +119,8 @@ define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(
|
||||
; GCN: s_load_dword s[[LO:[0-9]+]]
|
||||
; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
|
||||
; GCN: store_dwordx2
|
||||
define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
|
||||
%ext = sext <1 x i32> %ld to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -129,8 +129,8 @@ define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
|
||||
; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
|
||||
%ext = zext <2 x i32> %ld to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -143,8 +143,8 @@ define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(
|
||||
; GCN-DAG: s_ashr_i32
|
||||
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
|
||||
%ext = sext <2 x i32> %ld to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -155,8 +155,8 @@ define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(
|
||||
|
||||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
|
||||
%ext = zext <4 x i32> %ld to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -172,8 +172,8 @@ define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(
|
||||
|
||||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
|
||||
%ext = sext <4 x i32> %ld to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -191,8 +191,8 @@ define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-SA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
|
||||
%ext = zext <8 x i32> %ld to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -219,8 +219,8 @@ define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
|
||||
%ext = sext <8 x i32> %ld to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -240,8 +240,8 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(
|
||||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
; GCN: store_dwordx4
|
||||
define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
|
||||
%ext = sext <16 x i32> %ld to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -267,8 +267,8 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspa
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
; GCN-HSA: flat_store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
|
||||
%ext = zext <16 x i32> %ld to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -319,8 +319,8 @@ define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspa
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
|
||||
define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
|
||||
%ext = sext <32 x i32> %ld to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -370,8 +370,8 @@ define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspa
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
; GCN-HSA-DAG: flat_store_dwordx4
|
||||
define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
|
||||
%ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
|
||||
%ext = zext <32 x i32> %ld to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -7,8 +7,8 @@
|
||||
; FUNC-LABEL: {{^}}constant_load_i64:
|
||||
; GCN: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
|
||||
; EG: VTX_READ_64
|
||||
define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(2)* %in) #0 {
|
||||
%ld = load i64, i64 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(4)* %in) #0 {
|
||||
%ld = load i64, i64 addrspace(4)* %in
|
||||
store i64 %ld, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -17,9 +17,9 @@ define amdgpu_kernel void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspa
|
||||
; GCN: s_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <2 x i64>, <2 x i64> addrspace(2)* %in
|
||||
%ld = load <2 x i64>, <2 x i64> addrspace(4)* %in
|
||||
store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -29,9 +29,9 @@ entry:
|
||||
|
||||
; EG-DAG: VTX_READ_128
|
||||
; EG-DAG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <3 x i64>, <3 x i64> addrspace(2)* %in
|
||||
%ld = load <3 x i64>, <3 x i64> addrspace(4)* %in
|
||||
store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -41,9 +41,9 @@ entry:
|
||||
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <4 x i64>, <4 x i64> addrspace(2)* %in
|
||||
%ld = load <4 x i64>, <4 x i64> addrspace(4)* %in
|
||||
store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -55,9 +55,9 @@ entry:
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <8 x i64>, <8 x i64> addrspace(2)* %in
|
||||
%ld = load <8 x i64>, <8 x i64> addrspace(4)* %in
|
||||
store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -74,9 +74,9 @@ entry:
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
; EG: VTX_READ_128
|
||||
define amdgpu_kernel void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <16 x i64>, <16 x i64> addrspace(2)* %in
|
||||
%ld = load <16 x i64>, <16 x i64> addrspace(4)* %in
|
||||
store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -10,9 +10,9 @@
|
||||
|
||||
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; TODO: NOT AND
|
||||
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load i8, i8 addrspace(2)* %in
|
||||
%ld = load i8, i8 addrspace(4)* %in
|
||||
store i8 %ld, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -22,9 +22,9 @@ entry:
|
||||
; GCN-HSA: flat_load_ushort v
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
%ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -33,9 +33,9 @@ entry:
|
||||
; GCN: s_load_dword s
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
|
||||
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -44,9 +44,9 @@ entry:
|
||||
; GCN: s_load_dword s
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <4 x i8>, <4 x i8> addrspace(2)* %in
|
||||
%ld = load <4 x i8>, <4 x i8> addrspace(4)* %in
|
||||
store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -55,9 +55,9 @@ entry:
|
||||
; GCN: s_load_dwordx2
|
||||
|
||||
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <8 x i8>, <8 x i8> addrspace(2)* %in
|
||||
%ld = load <8 x i8>, <8 x i8> addrspace(4)* %in
|
||||
store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -66,9 +66,9 @@ entry:
|
||||
; GCN: s_load_dwordx4
|
||||
|
||||
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <16 x i8>, <16 x i8> addrspace(2)* %in
|
||||
%ld = load <16 x i8>, <16 x i8> addrspace(4)* %in
|
||||
store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -78,8 +78,8 @@ entry:
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
|
||||
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(4)* %in
|
||||
%ext = zext i8 %a to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -92,8 +92,8 @@ define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i
|
||||
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
|
||||
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
|
||||
; EG: 8
|
||||
define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%ld = load i8, i8 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%ld = load i8, i8 addrspace(4)* %in
|
||||
%ext = sext i8 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -102,8 +102,8 @@ define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
|
||||
|
||||
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
|
||||
%ext = zext <1 x i8> %load to <1 x i32>
|
||||
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -114,8 +114,8 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1
|
||||
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
|
||||
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
|
||||
; EG: 8
|
||||
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
|
||||
%ext = sext <1 x i8> %load to <1 x i32>
|
||||
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -129,8 +129,8 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1
|
||||
; TODO: This should use DST, but for some there are redundant MOVs
|
||||
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
|
||||
; EG: 8
|
||||
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
%ext = zext <2 x i8> %load to <2 x i32>
|
||||
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -150,8 +150,8 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
%ext = sext <2 x i8> %load to <2 x i32>
|
||||
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -170,9 +170,9 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1
|
||||
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
|
||||
%ext = zext <3 x i8> %ld to <3 x i32>
|
||||
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -193,9 +193,9 @@ entry:
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
|
||||
define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
|
||||
%ext = sext <3 x i8> %ld to <3 x i32>
|
||||
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -214,8 +214,8 @@ entry:
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
|
||||
%ext = zext <4 x i8> %load to <4 x i32>
|
||||
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -236,8 +236,8 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
|
||||
%ext = sext <4 x i8> %load to <4 x i32>
|
||||
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -264,8 +264,8 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
|
||||
%ext = zext <8 x i8> %load to <8 x i32>
|
||||
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -294,8 +294,8 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
|
||||
%ext = sext <8 x i8> %load to <8 x i32>
|
||||
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -335,8 +335,8 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
|
||||
%ext = zext <16 x i8> %load to <16 x i32>
|
||||
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -378,8 +378,8 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspac
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
|
||||
%ext = sext <16 x i8> %load to <16 x i32>
|
||||
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -450,8 +450,8 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspac
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
|
||||
%ext = zext <32 x i8> %load to <32 x i32>
|
||||
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -526,8 +526,8 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspac
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
; EG-DAG: 8
|
||||
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
|
||||
%ext = sext <32 x i8> %load to <32 x i32>
|
||||
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -539,8 +539,8 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspac
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
|
||||
define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <64 x i8>, <64 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <64 x i8>, <64 x i8> addrspace(4)* %in
|
||||
%ext = zext <64 x i8> %load to <64 x i32>
|
||||
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -552,8 +552,8 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspac
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
|
||||
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
|
||||
define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <64 x i8>, <64 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <64 x i8>, <64 x i8> addrspace(4)* %in
|
||||
%ext = sext <64 x i8> %load to <64 x i32>
|
||||
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -570,8 +570,8 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspac
|
||||
|
||||
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; EG: MOV {{.*}}, 0.0
|
||||
define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(4)* %in
|
||||
%ext = zext i8 %a to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -589,8 +589,8 @@ define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i
|
||||
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
|
||||
; TODO: Why not 7 ?
|
||||
; EG: 31
|
||||
define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(4)* %in
|
||||
%ext = sext i8 %a to i64
|
||||
store i64 %ext, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -600,8 +600,8 @@ define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i
|
||||
|
||||
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; EG: MOV {{.*}}, 0.0
|
||||
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
|
||||
%ext = zext <1 x i8> %load to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -613,8 +613,8 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1
|
||||
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
|
||||
; TODO: Why not 7 ?
|
||||
; EG: 31
|
||||
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
|
||||
%ext = sext <1 x i8> %load to <1 x i64>
|
||||
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -623,8 +623,8 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
%ext = zext <2 x i8> %load to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -633,8 +633,8 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
%ext = sext <2 x i8> %load to <2 x i64>
|
||||
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -643,8 +643,8 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
|
||||
%ext = zext <4 x i8> %load to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -653,8 +653,8 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
|
||||
%ext = sext <4 x i8> %load to <4 x i64>
|
||||
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -663,8 +663,8 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
|
||||
|
||||
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
|
||||
%ext = zext <8 x i8> %load to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -673,8 +673,8 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
|
||||
|
||||
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
|
||||
%ext = sext <8 x i8> %load to <8 x i64>
|
||||
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -683,8 +683,8 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
|
||||
|
||||
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
|
||||
%ext = zext <16 x i8> %load to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -693,8 +693,8 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspac
|
||||
; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
|
||||
|
||||
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
|
||||
%ext = sext <16 x i8> %load to <16 x i64>
|
||||
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -704,8 +704,8 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspac
|
||||
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
|
||||
%ext = zext <32 x i8> %load to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
@ -715,24 +715,24 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspac
|
||||
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
|
||||
%ext = sext <32 x i8> %load to <32 x i64>
|
||||
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
|
||||
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
|
||||
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
|
||||
; %ext = zext <64 x i8> %load to <64 x i64>
|
||||
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
||||
; ret void
|
||||
; }
|
||||
|
||||
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
|
||||
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
|
||||
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
|
||||
; %ext = sext <64 x i8> %load to <64 x i64>
|
||||
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
|
||||
; ret void
|
||||
@ -744,8 +744,8 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspac
|
||||
|
||||
; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
|
||||
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
|
||||
define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(4)* %in
|
||||
%ext = zext i8 %a to i16
|
||||
store i16 %ext, i16 addrspace(1)* %out
|
||||
ret void
|
||||
@ -759,16 +759,16 @@ define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i
|
||||
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
|
||||
|
||||
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%a = load i8, i8 addrspace(4)* %in
|
||||
%ext = sext i8 %a to i16
|
||||
store i16 %ext, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
|
||||
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
|
||||
%ext = zext <1 x i8> %load to <1 x i16>
|
||||
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -778,8 +778,8 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1
|
||||
|
||||
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <1 x i8>, <1 x i8> addrspace(4)* %in
|
||||
%ext = sext <1 x i8> %load to <1 x i16>
|
||||
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -788,8 +788,8 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
|
||||
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
%ext = zext <2 x i8> %load to <2 x i16>
|
||||
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -800,8 +800,8 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1
|
||||
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
%ext = sext <2 x i8> %load to <2 x i16>
|
||||
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -810,8 +810,8 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
|
||||
|
||||
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
|
||||
%ext = zext <4 x i8> %load to <4 x i16>
|
||||
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -824,8 +824,8 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(4)* %in
|
||||
%ext = sext <4 x i8> %load to <4 x i16>
|
||||
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -834,8 +834,8 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
|
||||
|
||||
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
|
||||
%ext = zext <8 x i8> %load to <8 x i16>
|
||||
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -853,8 +853,8 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
|
||||
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <8 x i8>, <8 x i8> addrspace(4)* %in
|
||||
%ext = sext <8 x i8> %load to <8 x i16>
|
||||
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -863,8 +863,8 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1
|
||||
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
|
||||
|
||||
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
|
||||
%ext = zext <16 x i8> %load to <16 x i16>
|
||||
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -889,8 +889,8 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspac
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <16 x i8>, <16 x i8> addrspace(4)* %in
|
||||
%ext = sext <16 x i8> %load to <16 x i16>
|
||||
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -900,8 +900,8 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspac
|
||||
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
|
||||
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
|
||||
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
|
||||
%ext = zext <32 x i8> %load to <32 x i16>
|
||||
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -943,24 +943,24 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspac
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
|
||||
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
|
||||
%load = load <32 x i8>, <32 x i8> addrspace(4)* %in
|
||||
%ext = sext <32 x i8> %load to <32 x i16>
|
||||
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
|
||||
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
|
||||
; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
|
||||
; %ext = zext <64 x i8> %load to <64 x i16>
|
||||
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
|
||||
; ret void
|
||||
; }
|
||||
|
||||
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
|
||||
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
|
||||
; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
|
||||
; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
|
||||
; %ext = sext <64 x i8> %load to <64 x i16>
|
||||
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
|
||||
; ret void
|
||||
|
@ -473,10 +473,10 @@ entry:
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; VI: flat_load_ushort
|
||||
define void @load_constant_hi_v2i16_reglo_vreg(i16 addrspace(2)* %in, i16 %reg) #0 {
|
||||
define void @load_constant_hi_v2i16_reglo_vreg(i16 addrspace(4)* %in, i16 %reg) #0 {
|
||||
entry:
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
|
||||
%load = load i16, i16 addrspace(2)* %gep
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
|
||||
%load = load i16, i16 addrspace(4)* %gep
|
||||
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
|
||||
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
|
||||
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
|
||||
@ -492,10 +492,10 @@ entry:
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; VI: flat_load_ushort
|
||||
define void @load_constant_hi_v2f16_reglo_vreg(half addrspace(2)* %in, half %reg) #0 {
|
||||
define void @load_constant_hi_v2f16_reglo_vreg(half addrspace(4)* %in, half %reg) #0 {
|
||||
entry:
|
||||
%gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
|
||||
%load = load half, half addrspace(2)* %gep
|
||||
%gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
|
||||
%load = load half, half addrspace(4)* %gep
|
||||
%build0 = insertelement <2 x half> undef, half %reg, i32 0
|
||||
%build1 = insertelement <2 x half> %build0, half %load, i32 1
|
||||
store <2 x half> %build1, <2 x half> addrspace(1)* undef
|
||||
@ -625,11 +625,11 @@ entry:
|
||||
; GFX9-NEXT: s_waitcnt
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define <2 x i16> @load_constant_v2i16_split(i16 addrspace(2)* %in) #0 {
|
||||
define <2 x i16> @load_constant_v2i16_split(i16 addrspace(4)* %in) #0 {
|
||||
entry:
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 1
|
||||
%load0 = load volatile i16, i16 addrspace(2)* %in
|
||||
%load1 = load volatile i16, i16 addrspace(2)* %gep
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 1
|
||||
%load0 = load volatile i16, i16 addrspace(4)* %in
|
||||
%load1 = load volatile i16, i16 addrspace(4)* %gep
|
||||
%build0 = insertelement <2 x i16> undef, i16 %load0, i32 0
|
||||
%build1 = insertelement <2 x i16> %build0, i16 %load1, i32 1
|
||||
ret <2 x i16> %build1
|
||||
|
@ -559,11 +559,11 @@ entry:
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; VI: flat_load_ushort
|
||||
define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(2)* %in, i32 %reg) #0 {
|
||||
define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
|
||||
entry:
|
||||
%reg.bc = bitcast i32 %reg to <2 x i16>
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
|
||||
%load = load i16, i16 addrspace(2)* %gep
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047
|
||||
%load = load i16, i16 addrspace(4)* %gep
|
||||
%build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
|
||||
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
|
||||
ret void
|
||||
@ -578,11 +578,11 @@ entry:
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; VI: flat_load_ushort
|
||||
define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(2)* %in, i32 %reg) #0 {
|
||||
define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 {
|
||||
entry:
|
||||
%reg.bc = bitcast i32 %reg to <2 x half>
|
||||
%gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
|
||||
%load = load half, half addrspace(2)* %gep
|
||||
%gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047
|
||||
%load = load half, half addrspace(4)* %gep
|
||||
%build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
|
||||
store <2 x half> %build1, <2 x half> addrspace(1)* undef
|
||||
ret void
|
||||
|
@ -5,17 +5,17 @@
|
||||
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
; GCN-LABEL: {{^}}get_global_id_0:
|
||||
; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
|
||||
; GCN: v_mov_b32_e32 [[VWGSIZEX:v[0-9]+]], [[WGSIZEX]]
|
||||
; GCN: v_mad_u32_u24 v{{[0-9]+}}, s8, [[VWGSIZEX]], v0
|
||||
define amdgpu_kernel void @get_global_id_0(i32 addrspace(1)* %out) #1 {
|
||||
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%cast.dispatch.ptr = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(2)* %cast.dispatch.ptr, i64 1
|
||||
%workgroup.size.xy = load i32, i32 addrspace(2)* %gep, align 4, !invariant.load !0
|
||||
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%cast.dispatch.ptr = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(4)* %cast.dispatch.ptr, i64 1
|
||||
%workgroup.size.xy = load i32, i32 addrspace(4)* %gep, align 4, !invariant.load !0
|
||||
%workgroup.size.x = and i32 %workgroup.size.xy, 65535
|
||||
|
||||
%workitem.id.x = call i32 @llvm.amdgcn.workitem.id.x(), !range !1
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
|
||||
|
||||
@ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8
|
||||
@ptr_load = addrspace(3) global i32 addrspace(4)* undef, align 8
|
||||
|
||||
; Make sure when the load from %ptr2 is folded the chain isn't lost,
|
||||
; resulting in losing the store to gptr
|
||||
@ -16,11 +16,11 @@
|
||||
; SI: buffer_store_dword
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
|
||||
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
|
||||
%ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @ptr_load, align 8
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2
|
||||
|
||||
store i32 99, i32 addrspace(1)* %gptr, align 4
|
||||
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
|
||||
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
|
||||
|
||||
store i32 %tmp2, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
|
@ -5,40 +5,40 @@
|
||||
|
||||
; CHECK-LABEL: {{^}}test_none:
|
||||
; CHECK: buffer_load_format_x v0, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define amdgpu_vs float @test_none(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
define amdgpu_vs float @test_none(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_idxen:
|
||||
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen{{$}}
|
||||
define amdgpu_vs float @test_idxen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
define amdgpu_vs float @test_idxen(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_offen:
|
||||
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
|
||||
define amdgpu_vs float @test_offen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
define amdgpu_vs float @test_offen(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_both:
|
||||
; CHECK: buffer_load_format_x v0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen{{$}}
|
||||
define amdgpu_vs float @test_both(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
define amdgpu_vs float @test_both(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
@ -55,10 +55,10 @@ entry:
|
||||
|
||||
; CHECK-LABEL: {{^}}soffset_max_imm:
|
||||
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
|
||||
define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
main_body:
|
||||
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
|
||||
%tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
|
||||
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
|
||||
%tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
|
||||
%tmp2 = shl i32 %6, 2
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp4 = add i32 %6, 16
|
||||
@ -74,10 +74,10 @@ main_body:
|
||||
; CHECK-LABEL: {{^}}soffset_no_fold:
|
||||
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
|
||||
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
|
||||
define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
main_body:
|
||||
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
|
||||
%tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
|
||||
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
|
||||
%tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
|
||||
%tmp2 = shl i32 %6, 2
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp4 = add i32 %6, 16
|
||||
|
@ -642,12 +642,12 @@ uniform.multi.exit.region:
|
||||
br i1 %uniform.cond0, label %uniform.if, label %uniform.ret1
|
||||
|
||||
uniform.if:
|
||||
%sgpr0 = load volatile i32, i32 addrspace(2)* undef
|
||||
%sgpr0 = load volatile i32, i32 addrspace(4)* undef
|
||||
%uniform.cond1 = icmp slt i32 %sgpr0, 1
|
||||
br i1 %uniform.cond1, label %uniform.then, label %uniform.endif
|
||||
|
||||
uniform.then:
|
||||
%sgpr1 = load volatile i32, i32 addrspace(2)* undef
|
||||
%sgpr1 = load volatile i32, i32 addrspace(4)* undef
|
||||
%uniform.cond2 = icmp sge i32 %sgpr1, 4
|
||||
store volatile i32 33, i32 addrspace(1)* undef
|
||||
br i1 %uniform.cond2, label %uniform.ret0, label %uniform.endif
|
||||
|
@ -6,21 +6,21 @@
|
||||
; EG: R_AMDGPU_ABS32 extern_const_addrspace
|
||||
|
||||
; CHECK-DAG: Name: extern_const_addrspace
|
||||
@extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
|
||||
@extern_const_addrspace = external unnamed_addr addrspace(4) constant [5 x i32], align 4
|
||||
|
||||
; CHECK-DAG: Name: load_extern_const_init
|
||||
define amdgpu_kernel void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
|
||||
%val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
|
||||
%val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @extern_const_addrspace, i64 0, i64 3), align 4
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: Name: undef_const_addrspace
|
||||
@undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
|
||||
@undef_const_addrspace = unnamed_addr addrspace(4) constant [5 x i32] undef, align 4
|
||||
|
||||
; CHECK-DAG: Name: undef_const_addrspace
|
||||
define amdgpu_kernel void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
|
||||
%val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
|
||||
%val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @undef_const_addrspace, i64 0, i64 3), align 4
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -194,9 +194,9 @@ define amdgpu_kernel void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out,
|
||||
; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
|
||||
define amdgpu_kernel void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
|
||||
define amdgpu_kernel void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(2)* %in
|
||||
%val = load i32, i32 addrspace(4)* %in
|
||||
%mask = and i32 %val, 65535
|
||||
store i32 %mask, i32 addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -1,7 +1,7 @@
|
||||
;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s
|
||||
;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s
|
||||
|
||||
%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(2)*, i32 addrspace(3)*, i32*, i32 addrspace(4)*}
|
||||
%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*}
|
||||
|
||||
; CHECK-LABEL: nullptr_priv:
|
||||
; CHECK-NEXT: .long 0
|
||||
@ -15,7 +15,7 @@
|
||||
; CHECK-LABEL: nullptr_const:
|
||||
; GCN-NEXT: .quad 0
|
||||
; R600-NEXT: .long 0
|
||||
@nullptr_const = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)
|
||||
@nullptr_const = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)
|
||||
|
||||
; CHECK-LABEL: nullptr_local:
|
||||
; CHECK-NEXT: .long -1
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
; CHECK-LABEL: nullptr_region:
|
||||
; CHECK-NEXT: .long -1
|
||||
@nullptr_region = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)
|
||||
@nullptr_region = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)
|
||||
|
||||
; CHECK-LABEL: nullptr6:
|
||||
; R600-NEXT: .long 0
|
||||
@ -113,7 +113,7 @@
|
||||
@structWithPointers = addrspace(1) global %struct.S {
|
||||
i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*),
|
||||
i32 addrspace(1)* addrspacecast (i32* null to i32 addrspace(1)*),
|
||||
i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*),
|
||||
i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*),
|
||||
i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*),
|
||||
i32* null,
|
||||
i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*)}, align 4
|
||||
i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)}, align 4
|
||||
|
@ -8,9 +8,9 @@
|
||||
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
|
||||
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
|
||||
%val0 = load volatile i32, i32 addrspace(2)* %in0
|
||||
%val1 = load volatile i32, i32 addrspace(2)* %in1
|
||||
define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) #0 {
|
||||
%val0 = load volatile i32, i32 addrspace(4)* %in0
|
||||
%val1 = load volatile i32, i32 addrspace(4)* %in1
|
||||
%lo.i = trunc i32 %val0 to i16
|
||||
%hi.i = trunc i32 %val1 to i16
|
||||
%lo = bitcast i16 %lo.i to half
|
||||
@ -27,8 +27,8 @@ define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)
|
||||
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
|
||||
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1234, [[VAL1]]
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
|
||||
%val1 = load i32, i32 addrspace(2)* %in1
|
||||
define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(4)* %in1) #0 {
|
||||
%val1 = load i32, i32 addrspace(4)* %in1
|
||||
%hi.i = trunc i32 %val1 to i16
|
||||
%hi = bitcast i16 %hi.i to half
|
||||
%vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
|
||||
@ -43,8 +43,8 @@ define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
|
||||
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
|
||||
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1234
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 {
|
||||
%val0 = load i32, i32 addrspace(2)* %in0
|
||||
define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(4)* %in0) #0 {
|
||||
%val0 = load i32, i32 addrspace(4)* %in0
|
||||
%lo.i = trunc i32 %val0 to i16
|
||||
%lo = bitcast i16 %lo.i to half
|
||||
%vec.0 = insertelement <2 x half> undef, half %lo, i32 0
|
||||
|
@ -8,9 +8,9 @@
|
||||
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
|
||||
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
|
||||
%val0 = load volatile i32, i32 addrspace(2)* %in0
|
||||
%val1 = load volatile i32, i32 addrspace(2)* %in1
|
||||
define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) #0 {
|
||||
%val0 = load volatile i32, i32 addrspace(4)* %in0
|
||||
%val1 = load volatile i32, i32 addrspace(4)* %in1
|
||||
%lo = trunc i32 %val0 to i16
|
||||
%hi = trunc i32 %val1 to i16
|
||||
%vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
|
||||
@ -25,8 +25,8 @@ define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(2)* %in0, i32 addrspace(2)
|
||||
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
|
||||
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1c8, [[VAL1]]
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(2)* %in1) #0 {
|
||||
%val1 = load i32, i32 addrspace(2)* %in1
|
||||
define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(4)* %in1) #0 {
|
||||
%val1 = load i32, i32 addrspace(4)* %in1
|
||||
%hi = trunc i32 %val1 to i16
|
||||
%vec.0 = insertelement <2 x i16> undef, i16 456, i32 0
|
||||
%vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
|
||||
@ -40,8 +40,8 @@ define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(2)* %in1) #0 {
|
||||
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
|
||||
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1c8
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(2)* %in0) #0 {
|
||||
%val0 = load i32, i32 addrspace(2)* %in0
|
||||
define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(4)* %in0) #0 {
|
||||
%val0 = load i32, i32 addrspace(4)* %in0
|
||||
%lo = trunc i32 %val0 to i16
|
||||
%vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
|
||||
%vec.1 = insertelement <2 x i16> %vec.0, i16 456, i32 1
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -filetype=obj -march=r600 -mcpu=cypress -verify-machineinstrs < %s | llvm-readobj -relocations -symbols | FileCheck %s
|
||||
|
||||
@arr = internal unnamed_addr addrspace(2) constant [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 4
|
||||
@arr = internal unnamed_addr addrspace(4) constant [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 4
|
||||
|
||||
; CHECK: Relocations [
|
||||
; CHECK: Section (3) .rel.text {
|
||||
@ -19,8 +19,8 @@
|
||||
; CHECK: }
|
||||
define amdgpu_kernel void @test_constant_array_fixup(i32 addrspace(1)* nocapture %out, i32 %idx) #0 {
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(2)* @arr, i32 0, i32 %idx
|
||||
%val = load i32, i32 addrspace(2)* %arrayidx
|
||||
%arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @arr, i32 0, i32 %idx
|
||||
%val = load i32, i32 addrspace(4)* %arrayidx
|
||||
store i32 %val, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -28,9 +28,9 @@ define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
|
||||
; SI-DAG: s_memtime
|
||||
; VI-DAG: s_memrealtime
|
||||
; GCN-DAG: s_load_dword
|
||||
define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(2)* inreg %in) #0 {
|
||||
define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(4)* inreg %in) #0 {
|
||||
%cycle0 = call i64 @llvm.readcyclecounter()
|
||||
%in.v = load i64, i64 addrspace(2)* %in
|
||||
%in.v = load i64, i64 addrspace(4)* %in
|
||||
%r.64 = add i64 %cycle0, %in.v
|
||||
%r.32 = trunc i64 %r.64 to i32
|
||||
ret i32 %r.32
|
||||
|
@ -7,7 +7,7 @@
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
; GCN: v_add_f32_e32 v0, 1.0, v0
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
%x = fadd float %arg3, 1.000000e+00
|
||||
@ -26,7 +26,7 @@ bb:
|
||||
; GCN-DAG: v_mov_b32_e32 v3, -1.0
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
|
||||
@ -43,7 +43,7 @@ bb:
|
||||
; GCN: v_mov_b32_e32 v3, v4
|
||||
; GCN: v_mov_b32_e32 v4, v6
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
@ -68,7 +68,7 @@ bb:
|
||||
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
|
||||
; GCN: v_mov_b32_e32 v0, 1.0
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
bb:
|
||||
ret float 1.000000e+00
|
||||
}
|
||||
@ -82,7 +82,7 @@ bb:
|
||||
; GCN-DAG: v_mov_b32_e32 v1, v2
|
||||
; GCN: v_mov_b32_e32 v2, v3
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
||||
bb:
|
||||
%f = bitcast <2 x i32> %arg8 to <2 x float>
|
||||
%s = insertvalue { float, <2 x float> } undef, float %arg14, 0
|
||||
@ -101,7 +101,7 @@ bb:
|
||||
; GCN-DAG: v_mov_b32_e32 v3, v6
|
||||
; GCN-DAG: v_mov_b32_e32 v4, v8
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
@ -130,7 +130,7 @@ bb:
|
||||
; GCN: v_mov_b32_e32 v3, v8
|
||||
; GCN: v_mov_b32_e32 v4, v12
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
@ -159,7 +159,7 @@ bb:
|
||||
; GCN: v_mov_b32_e32 v3, v4
|
||||
; GCN: v_mov_b32_e32 v4, v8
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
|
||||
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
|
||||
bb:
|
||||
%i0 = extractelement <2 x i32> %arg4, i32 0
|
||||
%i1 = extractelement <2 x i32> %arg4, i32 1
|
||||
@ -181,7 +181,7 @@ bb:
|
||||
; GCN: s_add_i32 s0, s3, 2
|
||||
; GCN: s_mov_b32 s2, s3
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
%x = add i32 %arg2, 2
|
||||
%a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
|
||||
@ -197,7 +197,7 @@ bb:
|
||||
; GCN-DAG: s_mov_b32 s2, 7
|
||||
; GCN-DAG: s_mov_b32 s3, 8
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
%x = add i32 %arg2, 2
|
||||
ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
|
||||
@ -212,7 +212,7 @@ bb:
|
||||
; GCN-DAG: s_add_i32 s0, s3, 2
|
||||
; GCN-DAG: s_mov_b32 s2, s3
|
||||
; GCN-NOT: s_endpgm
|
||||
define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
%v = fadd float %arg3, 1.000000e+00
|
||||
@ -235,7 +235,7 @@ bb:
|
||||
; GCN-DAG: v_mov_b32_e32 v1, 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 v2, 4.0
|
||||
; GCN: s_waitcnt expcnt(0)
|
||||
define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
||||
bb:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
||||
ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
|
||||
|
@ -65,24 +65,24 @@ done: ; preds = %loop
|
||||
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]
|
||||
; GCN-NOHSA: buffer_store_dword [[V_OUT]]
|
||||
; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]]
|
||||
define amdgpu_kernel void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
|
||||
define amdgpu_kernel void @smrd_valu(i32 addrspace(4)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
|
||||
entry:
|
||||
%tmp = icmp ne i32 %a, 0
|
||||
br i1 %tmp, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
%tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
|
||||
%tmp1 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in
|
||||
br label %endif
|
||||
|
||||
else: ; preds = %entry
|
||||
%tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
|
||||
%tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2
|
||||
%tmp2 = getelementptr i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in
|
||||
%tmp3 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %tmp2
|
||||
br label %endif
|
||||
|
||||
endif: ; preds = %else, %if
|
||||
%tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ]
|
||||
%tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000
|
||||
%tmp6 = load i32, i32 addrspace(2)* %tmp5
|
||||
%tmp4 = phi i32 addrspace(4)* [ %tmp1, %if ], [ %tmp3, %else ]
|
||||
%tmp5 = getelementptr i32, i32 addrspace(4)* %tmp4, i32 3000
|
||||
%tmp6 = load i32, i32 addrspace(4)* %tmp5
|
||||
store i32 %tmp6, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -93,12 +93,12 @@ endif: ; preds = %else, %if
|
||||
; GCN-NOHSA-NOT: v_add
|
||||
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
|
||||
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||
define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
|
||||
define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4
|
||||
%tmp3 = load i32, i32 addrspace(4)* %tmp2
|
||||
store i32 %tmp3, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -113,12 +113,12 @@ entry:
|
||||
; GCN-NOHSA: buffer_store_dword
|
||||
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||
; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(4)* %in, i32 %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000
|
||||
%tmp4 = load i32, i32 addrspace(2)* %tmp3
|
||||
%tmp2 = getelementptr i32, i32 addrspace(4)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr i32, i32 addrspace(4)* %tmp2, i32 5000
|
||||
%tmp4 = load i32, i32 addrspace(4)* %tmp3
|
||||
%tmp5 = add i32 %tmp4, %c
|
||||
store i32 %tmp5, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -133,12 +133,12 @@ entry:
|
||||
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; GCN-NOHSA: buffer_store_dwordx2
|
||||
; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(4)* %in, i64 %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000
|
||||
%tmp4 = load i64, i64 addrspace(2)* %tmp3
|
||||
%tmp2 = getelementptr i64, i64 addrspace(4)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr i64, i64 addrspace(4)* %tmp2, i32 5000
|
||||
%tmp4 = load i64, i64 addrspace(4)* %tmp3
|
||||
%tmp5 = or i64 %tmp4, %c
|
||||
store i64 %tmp5, i64 addrspace(1)* %out
|
||||
ret void
|
||||
@ -155,12 +155,12 @@ entry:
|
||||
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; GCN-NOHSA: buffer_store_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in, <4 x i32> %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234
|
||||
%tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3
|
||||
%tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %tmp2, i32 1234
|
||||
%tmp4 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp3
|
||||
%tmp5 = or <4 x i32> %tmp4, %c
|
||||
store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -189,12 +189,12 @@ entry:
|
||||
; GCN-NOHSA: buffer_store_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in, <8 x i32> %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234
|
||||
%tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3
|
||||
%tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %tmp2, i32 1234
|
||||
%tmp4 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp3
|
||||
%tmp5 = or <8 x i32> %tmp4, %c
|
||||
store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -230,12 +230,12 @@ entry:
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
|
||||
define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in, <16 x i32> %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234
|
||||
%tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3
|
||||
%tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %tmp2, i32 1234
|
||||
%tmp4 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp3
|
||||
%tmp5 = or <16 x i32> %tmp4, %c
|
||||
store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
@ -247,12 +247,12 @@ entry:
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
|
||||
; GCN-NOHSA: buffer_store_dword [[ADD]]
|
||||
; GCN-HSA: flat_store_dword {{.*}}, [[ADD]]
|
||||
define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
|
||||
define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in, i32 %a) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4
|
||||
%tmp3 = load i32, i32 addrspace(4)* %tmp2
|
||||
%tmp4 = add i32 %tmp3, %a
|
||||
store i32 %tmp4, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -261,12 +261,12 @@ entry:
|
||||
; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
|
||||
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
|
||||
; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
|
||||
define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
|
||||
define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 255
|
||||
%tmp3 = load i32, i32 addrspace(4)* %tmp2
|
||||
store i32 %tmp3, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -275,12 +275,12 @@ entry:
|
||||
; GCN-NOHSA-NOT: v_add
|
||||
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
|
||||
; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
|
||||
define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
|
||||
define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 256
|
||||
%tmp3 = load i32, i32 addrspace(4)* %tmp2
|
||||
store i32 %tmp3, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -290,12 +290,12 @@ entry:
|
||||
; GCN-NOHSA: buffer_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
|
||||
%tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)*
|
||||
%tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4
|
||||
store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
@ -313,12 +313,12 @@ entry:
|
||||
; GCN-NOHSA: buffer_store_dword
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
|
||||
%tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)*
|
||||
%tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4
|
||||
|
||||
%elt0 = extractelement <8 x i32> %tmp3, i32 0
|
||||
%elt1 = extractelement <8 x i32> %tmp3, i32 1
|
||||
@ -350,12 +350,12 @@ entry:
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
|
||||
%tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)*
|
||||
%tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4
|
||||
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
|
||||
ret void
|
||||
}
|
||||
@ -385,12 +385,12 @@ entry:
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
; GCN-HSA: flat_load_dwordx4
|
||||
define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
|
||||
%tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)*
|
||||
%tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4
|
||||
|
||||
%elt0 = extractelement <16 x i32> %tmp3, i32 0
|
||||
%elt1 = extractelement <16 x i32> %tmp3, i32 1
|
||||
|
@ -15,11 +15,11 @@
|
||||
bb:
|
||||
%0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3
|
||||
%tmp5 = alloca %struct.wombat, align 16, addrspace(5)
|
||||
%1 = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%2 = bitcast i8 addrspace(2)* %1 to i32 addrspace(2)*
|
||||
%3 = getelementptr inbounds i32, i32 addrspace(2)* %2, i64 1
|
||||
%4 = bitcast i32 addrspace(2)* %3 to <2 x i32> addrspace(2)*, !amdgpu.uniform !3, !amdgpu.noclobber !3
|
||||
%5 = load <2 x i32>, <2 x i32> addrspace(2)* %4, align 4, !invariant.load !3
|
||||
%1 = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%2 = bitcast i8 addrspace(4)* %1 to i32 addrspace(4)*
|
||||
%3 = getelementptr inbounds i32, i32 addrspace(4)* %2, i64 1
|
||||
%4 = bitcast i32 addrspace(4)* %3 to <2 x i32> addrspace(4)*, !amdgpu.uniform !3, !amdgpu.noclobber !3
|
||||
%5 = load <2 x i32>, <2 x i32> addrspace(4)* %4, align 4, !invariant.load !3
|
||||
%6 = extractelement <2 x i32> %5, i32 0
|
||||
%7 = extractelement <2 x i32> %5, i32 1
|
||||
%8 = lshr i32 %6, 16
|
||||
@ -32,7 +32,7 @@
|
||||
%15 = add i32 %13, %14
|
||||
%16 = add i32 %15, %11
|
||||
%17 = getelementptr inbounds [256 x [16 x i8]], [256 x [16 x i8]] addrspace(3)* @sched_dbg_value_crash.tmp6, i32 0, i32 %16
|
||||
%tmp7 = load i64, i64 addrspace(2)* null, align 536870912
|
||||
%tmp7 = load i64, i64 addrspace(4)* null, align 536870912
|
||||
%tmp8 = tail call i32 @llvm.amdgcn.workitem.id.x() #3, !range !4
|
||||
%tmp9 = zext i32 %tmp8 to i64
|
||||
%tmp10 = add i64 %tmp7, %tmp9
|
||||
@ -141,7 +141,7 @@
|
||||
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.z() #1
|
||||
declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i32, i1) #0
|
||||
@ -199,9 +199,9 @@ body: |
|
||||
%2:vgpr_32 = COPY $vgpr2
|
||||
%1:vgpr_32 = COPY $vgpr1
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
||||
%5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0
|
||||
%9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0
|
||||
%10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0
|
||||
|
@ -528,8 +528,8 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %
|
||||
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
|
||||
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
|
||||
define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
%ld = load i32, i32 addrspace(2)* %ptr
|
||||
define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
%ld = load i32, i32 addrspace(4)* %ptr
|
||||
%in = trunc i32 %ld to i16
|
||||
%shl = shl i16 %in, 15
|
||||
%sext = ashr i16 %shl, 15
|
||||
@ -547,8 +547,8 @@ define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addr
|
||||
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
|
||||
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
|
||||
define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
%ld = load i32, i32 addrspace(2)* %ptr
|
||||
define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
%ld = load i32, i32 addrspace(4)* %ptr
|
||||
%in = trunc i32 %ld to i16
|
||||
%shl = shl i16 %in, 14
|
||||
%sext = ashr i16 %shl, 14
|
||||
|
@ -4,10 +4,10 @@
|
||||
; CHECK-LABEL: {{^}}phi1:
|
||||
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
|
||||
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
|
||||
define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @phi1(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
|
||||
@ -28,10 +28,10 @@ ENDIF: ; preds = %ELSE, %main_body
|
||||
|
||||
; Make sure this program doesn't crash
|
||||
; CHECK-LABEL: {{^}}phi2:
|
||||
define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
|
||||
define amdgpu_ps void @phi2(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
|
||||
@ -47,10 +47,10 @@ main_body:
|
||||
%tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
|
||||
%tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
|
||||
%tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
|
||||
%tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
|
||||
%tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
|
||||
%tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
|
||||
%tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0
|
||||
%tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0
|
||||
%tmp37 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp36, !tbaa !0
|
||||
%tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0
|
||||
%tmp39 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp38, !tbaa !0
|
||||
%i.i = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
@ -173,10 +173,10 @@ ENDIF24: ; preds = %IF25, %ENDIF
|
||||
|
||||
; We just want ot make sure the program doesn't crash
|
||||
; CHECK-LABEL: {{^}}loop:
|
||||
define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @loop(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
|
||||
@ -226,15 +226,15 @@ ENDIF: ; preds = %LOOP
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
|
||||
; CHECK: exp
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
|
||||
%tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
|
||||
%tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
|
||||
%tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0
|
||||
%tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
|
||||
%tmp24 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp23, !tbaa !0
|
||||
%tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
|
||||
%tmp26 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp25, !tbaa !0
|
||||
%tmp27 = fcmp oeq float %tmp22, 0.000000e+00
|
||||
%tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32>
|
||||
br i1 %tmp27, label %if, label %else
|
||||
@ -290,7 +290,7 @@ endif: ; preds = %if1, %if0, %entry
|
||||
; This test is just checking that we don't crash / assertion fail.
|
||||
; CHECK-LABEL: {{^}}copy2:
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
entry:
|
||||
br label %LOOP68
|
||||
|
||||
@ -326,15 +326,15 @@ ENDIF69: ; preds = %LOOP68
|
||||
; [[END]]:
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <4 x i32>] addrspace(4)* byval %arg2, [32 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
bb:
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0
|
||||
%tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0
|
||||
%tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !3
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
|
||||
%tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
|
||||
%tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3
|
||||
%tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
|
||||
%tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !3
|
||||
%tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(4)* %arg3, i32 0, i32 0
|
||||
%tmp26 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp25, !tbaa !3
|
||||
%tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg2, i32 0, i32 0
|
||||
%tmp28 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp27, !tbaa !3
|
||||
%i.i = extractelement <2 x i32> %arg7, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg7, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
@ -382,11 +382,11 @@ bb71: ; preds = %bb80, %bb38
|
||||
; Check the resource descriptor is stored in an sgpr.
|
||||
; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
|
||||
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
|
||||
define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(4)* byval %arg) #0 {
|
||||
bb:
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
|
||||
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp10 = extractelement <4 x float> %tmp, i32 0
|
||||
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
|
||||
@ -397,11 +397,11 @@ bb:
|
||||
; Check the sampler is stored in an sgpr.
|
||||
; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
|
||||
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
|
||||
define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(4)* byval %arg) #0 {
|
||||
bb:
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
|
||||
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
|
||||
%tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp10 = extractelement <4 x float> %tmp, i32 0
|
||||
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
|
||||
|
@ -6,15 +6,15 @@
|
||||
|
||||
; GCN-LABEL: {{^}}main:
|
||||
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
|
||||
define amdgpu_ps void @main(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @main(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <8 x i32> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
|
||||
%tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
|
||||
%tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
|
||||
%tmp25 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp24, !tbaa !0
|
||||
%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0
|
||||
%tmp23 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp22, !tbaa !0
|
||||
%tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0
|
||||
%tmp25 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp24, !tbaa !0
|
||||
%i.i = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
|
@ -16,12 +16,12 @@
|
||||
; CHECK: s_waitcnt vmcnt(0)
|
||||
; CHECK: exp
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
define amdgpu_ps void @main([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
main_body:
|
||||
%tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
|
||||
%tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
|
||||
%tmp23 = bitcast [17 x <4 x i32>] addrspace(2)* %arg2 to <16 x i8> addrspace(2)*
|
||||
%tmp24 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp23, align 16, !tbaa !0
|
||||
%tmp = bitcast [34 x <8 x i32>] addrspace(4)* %arg3 to <32 x i8> addrspace(4)*
|
||||
%tmp22 = load <32 x i8>, <32 x i8> addrspace(4)* %tmp, align 32, !tbaa !0
|
||||
%tmp23 = bitcast [17 x <4 x i32>] addrspace(4)* %arg2 to <16 x i8> addrspace(4)*
|
||||
%tmp24 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp23, align 16, !tbaa !0
|
||||
%i.i = extractelement <2 x i32> %arg11, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg11, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
|
@ -24,10 +24,10 @@
|
||||
; GCN: s_endpgm
|
||||
|
||||
; TOVGPR: ScratchSize: 0{{$}}
|
||||
define amdgpu_ps void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
||||
define amdgpu_ps void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
||||
main_body:
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 96)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 100)
|
||||
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 104)
|
||||
@ -66,39 +66,39 @@ main_body:
|
||||
%tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 372)
|
||||
%tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 376)
|
||||
%tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 384)
|
||||
%tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
|
||||
%tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
|
||||
%tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp63 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp62, !tbaa !0
|
||||
%tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
|
||||
%tmp61 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp60, !tbaa !0
|
||||
%tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
|
||||
%tmp63 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp62, !tbaa !0
|
||||
%tmp63.bc = bitcast <4 x i32> %tmp63 to <4 x i32>
|
||||
%tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
|
||||
%tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
|
||||
%tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
|
||||
%tmp67 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp66, !tbaa !0
|
||||
%tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
|
||||
%tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
|
||||
%tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
|
||||
%tmp71 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp70, !tbaa !0
|
||||
%tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
|
||||
%tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
|
||||
%tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
|
||||
%tmp75 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp74, !tbaa !0
|
||||
%tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
|
||||
%tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
|
||||
%tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
|
||||
%tmp79 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp78, !tbaa !0
|
||||
%tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
|
||||
%tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
|
||||
%tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
|
||||
%tmp83 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp82, !tbaa !0
|
||||
%tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
|
||||
%tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
|
||||
%tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
|
||||
%tmp87 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp86, !tbaa !0
|
||||
%tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
|
||||
%tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
|
||||
%tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
|
||||
%tmp91 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp90, !tbaa !0
|
||||
%tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 1
|
||||
%tmp65 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp64, !tbaa !0
|
||||
%tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 1
|
||||
%tmp67 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp66, !tbaa !0
|
||||
%tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 2
|
||||
%tmp69 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp68, !tbaa !0
|
||||
%tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 2
|
||||
%tmp71 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp70, !tbaa !0
|
||||
%tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 3
|
||||
%tmp73 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp72, !tbaa !0
|
||||
%tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 3
|
||||
%tmp75 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp74, !tbaa !0
|
||||
%tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 4
|
||||
%tmp77 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp76, !tbaa !0
|
||||
%tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 4
|
||||
%tmp79 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp78, !tbaa !0
|
||||
%tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 5
|
||||
%tmp81 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp80, !tbaa !0
|
||||
%tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 5
|
||||
%tmp83 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp82, !tbaa !0
|
||||
%tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 6
|
||||
%tmp85 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp84, !tbaa !0
|
||||
%tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 6
|
||||
%tmp87 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp86, !tbaa !0
|
||||
%tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 7
|
||||
%tmp89 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp88, !tbaa !0
|
||||
%tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 7
|
||||
%tmp91 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp90, !tbaa !0
|
||||
%i.i = extractelement <2 x i32> %arg6, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg6, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
@ -778,10 +778,10 @@ ENDIF66: ; preds = %LOOP65
|
||||
; GCN-LABEL: {{^}}main1:
|
||||
; GCN: s_endpgm
|
||||
; TOVGPR: ScratchSize: 0{{$}}
|
||||
define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 0)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 4)
|
||||
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 8)
|
||||
@ -885,42 +885,42 @@ main_body:
|
||||
%tmp122 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 716)
|
||||
%tmp123 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 864)
|
||||
%tmp124 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 868)
|
||||
%tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
|
||||
%tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
|
||||
%tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp128 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp127, !tbaa !0
|
||||
%tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
|
||||
%tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
|
||||
%tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
|
||||
%tmp132 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp131, !tbaa !0
|
||||
%tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
|
||||
%tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
|
||||
%tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
|
||||
%tmp136 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp135, !tbaa !0
|
||||
%tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
|
||||
%tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
|
||||
%tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
|
||||
%tmp140 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp139, !tbaa !0
|
||||
%tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
|
||||
%tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
|
||||
%tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
|
||||
%tmp144 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp143, !tbaa !0
|
||||
%tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
|
||||
%tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
|
||||
%tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
|
||||
%tmp148 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp147, !tbaa !0
|
||||
%tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
|
||||
%tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
|
||||
%tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
|
||||
%tmp152 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp151, !tbaa !0
|
||||
%tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
|
||||
%tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
|
||||
%tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
|
||||
%tmp156 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp155, !tbaa !0
|
||||
%tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
|
||||
%tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
|
||||
%tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 8
|
||||
%tmp160 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp159, !tbaa !0
|
||||
%tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
|
||||
%tmp126 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp125, !tbaa !0
|
||||
%tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
|
||||
%tmp128 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp127, !tbaa !0
|
||||
%tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 1
|
||||
%tmp130 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp129, !tbaa !0
|
||||
%tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 1
|
||||
%tmp132 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp131, !tbaa !0
|
||||
%tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 2
|
||||
%tmp134 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp133, !tbaa !0
|
||||
%tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 2
|
||||
%tmp136 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp135, !tbaa !0
|
||||
%tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 3
|
||||
%tmp138 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp137, !tbaa !0
|
||||
%tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 3
|
||||
%tmp140 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp139, !tbaa !0
|
||||
%tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 4
|
||||
%tmp142 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp141, !tbaa !0
|
||||
%tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 4
|
||||
%tmp144 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp143, !tbaa !0
|
||||
%tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 5
|
||||
%tmp146 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp145, !tbaa !0
|
||||
%tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 5
|
||||
%tmp148 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp147, !tbaa !0
|
||||
%tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 6
|
||||
%tmp150 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp149, !tbaa !0
|
||||
%tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 6
|
||||
%tmp152 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp151, !tbaa !0
|
||||
%tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 7
|
||||
%tmp154 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp153, !tbaa !0
|
||||
%tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 7
|
||||
%tmp156 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp155, !tbaa !0
|
||||
%tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 8
|
||||
%tmp158 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp157, !tbaa !0
|
||||
%tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 8
|
||||
%tmp160 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp159, !tbaa !0
|
||||
%tmp161 = fcmp ugt float %arg17, 0.000000e+00
|
||||
%tmp162 = select i1 %tmp161, float 1.000000e+00, float 0.000000e+00
|
||||
%i.i = extractelement <2 x i32> %arg6, i32 0
|
||||
|
@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
|
||||
|
||||
|
||||
@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
|
||||
@stored_constant_ptr = addrspace(3) global i32 addrspace(2)* undef, align 8
|
||||
@stored_constant_ptr = addrspace(3) global i32 addrspace(4)* undef, align 8
|
||||
@stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8
|
||||
|
||||
; GCN-LABEL: {{^}}reorder_local_load_global_store_local_load:
|
||||
@ -100,14 +100,14 @@ define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load
|
||||
; CI: buffer_store_dword
|
||||
; GFX9: global_store_dword
|
||||
define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
|
||||
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
|
||||
%ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8
|
||||
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3
|
||||
|
||||
%tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
|
||||
%tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
|
||||
store i32 99, i32 addrspace(1)* %gptr, align 4
|
||||
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
|
||||
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
|
||||
|
||||
%add = add nsw i32 %tmp1, %tmp2
|
||||
|
||||
@ -129,14 +129,14 @@ define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32
|
||||
; CI: buffer_store_dword
|
||||
; GFX9: global_store_dword
|
||||
define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
|
||||
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
|
||||
%ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8
|
||||
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3
|
||||
|
||||
%tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
|
||||
%tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
|
||||
store i32 99, i32 addrspace(3)* %lptr, align 4
|
||||
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
|
||||
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
|
||||
|
||||
%add = add nsw i32 %tmp1, %tmp2
|
||||
|
||||
@ -151,13 +151,13 @@ define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 a
|
||||
; GCN: ds_write_b32
|
||||
; CI: buffer_store_dword
|
||||
; GFX9: global_store_dword
|
||||
define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
|
||||
define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(4)* %ptr0) #0 {
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1
|
||||
%ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2
|
||||
|
||||
%tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
|
||||
%tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4
|
||||
store i32 99, i32 addrspace(3)* %lptr, align 4
|
||||
%tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
|
||||
%tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4
|
||||
|
||||
%add = add nsw i32 %tmp1, %tmp2
|
||||
|
||||
|
@ -12,10 +12,10 @@
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: [[EXIT]]:
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @vccz_workaround(i32 addrspace(2)* %in, i32 addrspace(1)* %out, float %cond) {
|
||||
define amdgpu_kernel void @vccz_workaround(i32 addrspace(4)* %in, i32 addrspace(1)* %out, float %cond) {
|
||||
entry:
|
||||
%cnd = fcmp oeq float 0.0, %cond
|
||||
%sgpr = load volatile i32, i32 addrspace(2)* %in
|
||||
%sgpr = load volatile i32, i32 addrspace(4)* %in
|
||||
br i1 %cnd, label %if, label %endif
|
||||
|
||||
if:
|
||||
|
@ -7,10 +7,10 @@
|
||||
; GCN-LABEL: {{^}}smrd0:
|
||||
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
|
||||
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
|
||||
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 1
|
||||
%tmp1 = load i32, i32 addrspace(4)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -19,10 +19,10 @@ entry:
|
||||
; GCN-LABEL: {{^}}smrd1:
|
||||
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
|
||||
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 255
|
||||
%tmp1 = load i32, i32 addrspace(4)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -34,10 +34,10 @@ entry:
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
|
||||
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 256
|
||||
%tmp1 = load i32, i32 addrspace(4)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -49,10 +49,10 @@ entry:
|
||||
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
|
||||
; TODO: Add VI checks
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296
|
||||
%tmp1 = load i32, i32 addrspace(4)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -63,10 +63,10 @@ entry:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
|
||||
; VIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
|
||||
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143
|
||||
%tmp1 = load i32, i32 addrspace(4)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -77,10 +77,10 @@ entry:
|
||||
; SIVIGFX9: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
|
||||
%tmp1 = load i32, i32 addrspace(2)* %tmp
|
||||
%tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144
|
||||
%tmp1 = load i32, i32 addrspace(4)* %tmp
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -106,10 +106,10 @@ main_body:
|
||||
; GCN-LABEL: {{^}}smrd_load_const0:
|
||||
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
|
||||
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
|
||||
define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
@ -120,10 +120,10 @@ main_body:
|
||||
; GCN-LABEL: {{^}}smrd_load_const1:
|
||||
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
|
||||
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1020)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
@ -137,10 +137,10 @@ main_body:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
|
||||
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1024)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
@ -152,10 +152,10 @@ main_body:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
|
||||
; VIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
|
||||
define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048572)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
@ -167,10 +167,10 @@ main_body:
|
||||
; SIVIGFX9: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(4)* inreg %arg, <4 x i32> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048576)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
@ -257,9 +257,9 @@ main_body:
|
||||
|
||||
; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
|
||||
; GCN: v_readfirstlane
|
||||
define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
|
||||
define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
|
||||
main_body:
|
||||
%descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
|
||||
%descptr = bitcast [0 x i8] addrspace(4)* %0 to <4 x i32> addrspace(4)*, !amdgpu.uniform !0
|
||||
br label %.outer_loop_header
|
||||
|
||||
ret_block: ; preds = %.outer, %.label22, %main_body
|
||||
@ -275,7 +275,7 @@ ret_block: ; preds = %.outer, %.label22, %
|
||||
br i1 %inner_br1, label %.inner_loop_body, label %ret_block
|
||||
|
||||
.inner_loop_body:
|
||||
%descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
|
||||
%descriptor = load <4 x i32>, <4 x i32> addrspace(4)* %descptr, align 16, !invariant.load !0
|
||||
%load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
|
||||
%inner_br2 = icmp uge i32 %1, 10
|
||||
br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
|
||||
|
@ -87,7 +87,7 @@ endif:
|
||||
; GCN-NOT: v_readlane_b32 m0
|
||||
; GCN-NOT: s_buffer_store_dword m0
|
||||
; GCN-NOT: s_buffer_load_dword m0
|
||||
define amdgpu_ps void @spill_kill_m0_lds(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %m0) #0 {
|
||||
define amdgpu_ps void @spill_kill_m0_lds(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %m0) #0 {
|
||||
main_body:
|
||||
%tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0)
|
||||
%cmp = fcmp ueq float 0.000000e+00, %tmp
|
||||
@ -191,7 +191,7 @@ endif:
|
||||
; TOSMEM: s_endpgm
|
||||
define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
|
||||
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
|
||||
%sval = load volatile i64, i64 addrspace(2)* undef
|
||||
%sval = load volatile i64, i64 addrspace(4)* undef
|
||||
%cmp = icmp eq i32 %arg, 0
|
||||
br i1 %cmp, label %ret, label %bb
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
; GCN-LABEL: {{^}}split_smrd_add_worklist:
|
||||
; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
|
||||
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(4)* byval %arg) #0 {
|
||||
bb:
|
||||
%tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
|
||||
%tmp1 = bitcast float %tmp to i32
|
||||
@ -19,8 +19,8 @@ bb3: ; preds = %bb
|
||||
%tmp4 = bitcast float %tmp to i32
|
||||
%tmp5 = add i32 %tmp4, 4
|
||||
%tmp6 = sext i32 %tmp5 to i64
|
||||
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i64 0, i64 %tmp6
|
||||
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
|
||||
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6
|
||||
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
|
||||
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp10 = extractelement <4 x float> %tmp9, i32 0
|
||||
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
|
||||
|
@ -394,11 +394,11 @@ entry:
|
||||
|
||||
; SIVI: buffer_store_dwordx2
|
||||
; GFX9: global_store_dwordx2
|
||||
define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
|
||||
define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
|
||||
entry:
|
||||
%0 = load i32, i32 addrspace(2)* %mem, align 4
|
||||
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
|
||||
%1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
|
||||
%0 = load i32, i32 addrspace(4)* %mem, align 4
|
||||
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
|
||||
%1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
|
||||
store i32 %0, i32 addrspace(1)* %out, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
|
||||
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
|
||||
|
@ -689,11 +689,11 @@ entry:
|
||||
; XSI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
|
||||
define amdgpu_kernel void @vecload2(i32 addrspace(5)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
|
||||
entry:
|
||||
%0 = load i32, i32 addrspace(2)* %mem, align 4
|
||||
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
|
||||
%1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
|
||||
%0 = load i32, i32 addrspace(4)* %mem, align 4
|
||||
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
|
||||
%1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
|
||||
store i32 %0, i32 addrspace(5)* %out, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %out, i64 1
|
||||
store i32 %1, i32 addrspace(5)* %arrayidx1, align 4
|
||||
|
@ -27,9 +27,9 @@ define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
|
||||
|
||||
; VI: s_sub_i32
|
||||
; VI: s_sub_i32
|
||||
define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
|
||||
%b = load <2 x i16>, <2 x i16> addrspace(2)* %in1
|
||||
define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0, <2 x i16> addrspace(4)* %in1) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
|
||||
%b = load <2 x i16>, <2 x i16> addrspace(4)* %in1
|
||||
%add = sub <2 x i16> %a, %b
|
||||
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
@ -38,8 +38,8 @@ define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
|
||||
; GCN-LABEL: {{^}}s_test_sub_self_v2i16:
|
||||
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]]
|
||||
; GCN: buffer_store_dword [[ZERO]]
|
||||
define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(2)* %in0
|
||||
define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0) #1 {
|
||||
%a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
|
||||
%add = sub <2 x i16> %a, %a
|
||||
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #1
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
@ -15,10 +15,10 @@ declare void @llvm.amdgcn.s.dcache.wb() #0
|
||||
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
define amdgpu_kernel void @target_none() #0 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
@ -31,10 +31,10 @@ define amdgpu_kernel void @target_none() #0 {
|
||||
; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
define amdgpu_kernel void @target_tahiti() #1 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
@ -47,10 +47,10 @@ define amdgpu_kernel void @target_tahiti() #1 {
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
; CHECK: s_dcache_inv_vol
|
||||
define amdgpu_kernel void @target_bonaire() #3 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
@ -64,10 +64,10 @@ define amdgpu_kernel void @target_bonaire() #3 {
|
||||
; CHECK: flat_store_dword
|
||||
; CHECK: s_dcache_wb{{$}}
|
||||
define amdgpu_kernel void @target_fiji() #4 {
|
||||
%kernargs = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(2)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(2)* %kernargs.gep to i32 addrspace(1)* addrspace(2)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %kernargs.gep.cast
|
||||
%kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
%kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024
|
||||
%kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)*
|
||||
%ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%id.ext = sext i32 %id to i64
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext
|
||||
|
@ -418,8 +418,8 @@ define amdgpu_kernel void @local_store_i64_align_4_with_split_offset(i64 addrspa
|
||||
; UNALIGNED: s_load_dword
|
||||
|
||||
; SI: buffer_store_dword
|
||||
define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
|
||||
%v = load i32, i32 addrspace(2)* %p, align 1
|
||||
define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
|
||||
%v = load i32, i32 addrspace(4)* %p, align 1
|
||||
store i32 %v, i32 addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -430,8 +430,8 @@ define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32
|
||||
|
||||
; UNALIGNED: s_load_dword
|
||||
; UNALIGNED: buffer_store_dword
|
||||
define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
|
||||
%v = load i32, i32 addrspace(2)* %p, align 2
|
||||
define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
|
||||
%v = load i32, i32 addrspace(4)* %p, align 2
|
||||
store i32 %v, i32 addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -444,8 +444,8 @@ define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 ad
|
||||
|
||||
; UNALIGNED: s_load_dwordx2
|
||||
; UNALIGNED: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
|
||||
%v = load i64, i64 addrspace(2)* %p, align 2
|
||||
define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(4)* %p, i64 addrspace(1)* %r) #0 {
|
||||
%v = load i64, i64 addrspace(4)* %p, align 2
|
||||
store i64 %v, i64 addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -453,8 +453,8 @@ define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 ad
|
||||
; SI-LABEL: {{^}}constant_align4_load_i64:
|
||||
; SI: s_load_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
|
||||
%v = load i64, i64 addrspace(2)* %p, align 4
|
||||
define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(4)* %p, i64 addrspace(1)* %r) #0 {
|
||||
%v = load i64, i64 addrspace(4)* %p, align 4
|
||||
store i64 %v, i64 addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -462,8 +462,8 @@ define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 ad
|
||||
; SI-LABEL: {{^}}constant_align4_load_v4i32:
|
||||
; SI: s_load_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
|
||||
%v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4
|
||||
define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(4)* %p, <4 x i32> addrspace(1)* %r) #0 {
|
||||
%v = load <4 x i32>, <4 x i32> addrspace(4)* %p, align 4
|
||||
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -482,8 +482,8 @@ define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p
|
||||
; UNALIGNED: buffer_load_dwordx2
|
||||
|
||||
; SI: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 {
|
||||
%v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1
|
||||
define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(4)* %p, <2 x i32> addrspace(1)* %r) #0 {
|
||||
%v = load <2 x i32>, <2 x i32> addrspace(4)* %p, align 1
|
||||
store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -512,8 +512,8 @@ define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)*
|
||||
; UNALIGNED: buffer_load_dwordx4
|
||||
|
||||
; SI: buffer_store_dwordx4
|
||||
define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
|
||||
%v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1
|
||||
define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(4)* %p, <4 x i32> addrspace(1)* %r) #0 {
|
||||
%v = load <4 x i32>, <4 x i32> addrspace(4)* %p, align 1
|
||||
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -521,8 +521,8 @@ define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)*
|
||||
; SI-LABEL: {{^}}constant_align4_load_i8:
|
||||
; SI: s_load_dword
|
||||
; SI: buffer_store_byte
|
||||
define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
|
||||
%v = load i8, i8 addrspace(2)* %p, align 4
|
||||
define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(4)* %p, i8 addrspace(1)* %r) #0 {
|
||||
%v = load i8, i8 addrspace(4)* %p, align 4
|
||||
store i8 %v, i8 addrspace(1)* %r, align 4
|
||||
ret void
|
||||
}
|
||||
@ -530,8 +530,8 @@ define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrs
|
||||
; SI-LABEL: {{^}}constant_align2_load_i8:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_store_byte
|
||||
define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
|
||||
%v = load i8, i8 addrspace(2)* %p, align 2
|
||||
define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(4)* %p, i8 addrspace(1)* %r) #0 {
|
||||
%v = load i8, i8 addrspace(4)* %p, align 2
|
||||
store i8 %v, i8 addrspace(1)* %r, align 2
|
||||
ret void
|
||||
}
|
||||
@ -541,10 +541,10 @@ define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrs
|
||||
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
|
||||
%gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1
|
||||
%v0 = load i32, i32 addrspace(2)* %p, align 4
|
||||
%v1 = load i32, i32 addrspace(2)* %gep0, align 4
|
||||
define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
|
||||
%gep0 = getelementptr i32, i32 addrspace(4)* %p, i64 1
|
||||
%v0 = load i32, i32 addrspace(4)* %p, align 4
|
||||
%v1 = load i32, i32 addrspace(4)* %gep0, align 4
|
||||
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %r, i64 1
|
||||
store i32 %v0, i32 addrspace(1)* %r, align 4
|
||||
|
@ -35,7 +35,7 @@ bb2: ; preds = %bb
|
||||
br label %bb3
|
||||
|
||||
bb3: ; preds = %bb3, %bb2
|
||||
%val = load volatile i32, i32 addrspace(2)* undef
|
||||
%val = load volatile i32, i32 addrspace(4)* undef
|
||||
%tmp4 = icmp eq i32 %val, %arg1
|
||||
br i1 %tmp4, label %bb5, label %bb3
|
||||
|
||||
|
@ -36,11 +36,11 @@ define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace
|
||||
; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #2 ; encoding: [0x40,0x02,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
|
||||
; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #2 ; encoding: [0x40,0x02,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
|
||||
|
||||
@t = internal addrspace(2) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
|
||||
@t = internal addrspace(4) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
|
||||
|
||||
define amdgpu_kernel void @vtx_fetch32_id2(i32 addrspace(1)* %out, i32 %in) {
|
||||
%a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(2)* @t, i32 0, i32 %in
|
||||
%v = load i32, i32 addrspace(2)* %a
|
||||
%a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @t, i32 0, i32 %in
|
||||
%v = load i32, i32 addrspace(4)* %a
|
||||
store i32 %v, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -27,15 +27,15 @@
|
||||
; GCN: NumVgprs: 256
|
||||
; GCN: ScratchSize: 1536
|
||||
|
||||
define amdgpu_vs void @main([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <4 x i32>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <4 x i32>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
bb:
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i64 0, i64 0
|
||||
%tmp11 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, align 16, !tbaa !0
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i64 0, i64 0
|
||||
%tmp11 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, align 16, !tbaa !0
|
||||
%tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 0)
|
||||
%tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 16)
|
||||
%tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 32)
|
||||
%tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg4, i64 0, i64 0
|
||||
%tmp16 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp15, align 16, !tbaa !0
|
||||
%tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg4, i64 0, i64 0
|
||||
%tmp16 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp15, align 16, !tbaa !0
|
||||
%tmp17 = add i32 %arg5, %arg7
|
||||
%tmp16.cast = bitcast <4 x i32> %tmp16 to <4 x i32>
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i1 false, i1 false)
|
||||
|
@ -11,19 +11,19 @@
|
||||
; DEFAULT: exp
|
||||
; DEFAULT: s_waitcnt lgkmcnt(0)
|
||||
; DEFAULT: s_endpgm
|
||||
define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
|
||||
define amdgpu_vs void @main(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, <16 x i8> addrspace(4)* inreg %arg3, <16 x i8> addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(4)* inreg %constptr) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
|
||||
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 0
|
||||
%tmp10 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, !tbaa !0
|
||||
%tmp10.cast = bitcast <16 x i8> %tmp10 to <4 x i32>
|
||||
%tmp11 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp10.cast, i32 %arg6, i32 0, i1 false, i1 false)
|
||||
%tmp12 = extractelement <4 x float> %tmp11, i32 0
|
||||
%tmp13 = extractelement <4 x float> %tmp11, i32 1
|
||||
call void @llvm.amdgcn.s.barrier() #1
|
||||
%tmp14 = extractelement <4 x float> %tmp11, i32 2
|
||||
%tmp15 = load float, float addrspace(2)* %constptr, align 4
|
||||
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
|
||||
%tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
|
||||
%tmp15 = load float, float addrspace(4)* %constptr, align 4
|
||||
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 1
|
||||
%tmp17 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp16, !tbaa !0
|
||||
%tmp17.cast = bitcast <16 x i8> %tmp17 to <4 x i32>
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp17.cast, i32 %arg6, i32 0, i1 false, i1 false)
|
||||
%tmp19 = extractelement <4 x float> %tmp18, i32 0
|
||||
@ -46,10 +46,10 @@ main_body:
|
||||
; ILPMAX: exp pos0
|
||||
; ILPMAX-NEXT: exp param0
|
||||
; ILPMAX: s_endpgm
|
||||
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <16 x i8>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
|
||||
%tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
|
||||
%tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 0
|
||||
%tmp11 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, align 16, !tbaa !0
|
||||
%tmp12 = add i32 %arg5, %arg7
|
||||
%tmp11.cast = bitcast <16 x i8> %tmp11 to <4 x i32>
|
||||
%tmp13 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp11.cast, i32 %tmp12, i32 0, i1 false, i1 false)
|
||||
@ -57,8 +57,8 @@ main_body:
|
||||
%tmp15 = extractelement <4 x float> %tmp13, i32 1
|
||||
%tmp16 = extractelement <4 x float> %tmp13, i32 2
|
||||
%tmp17 = extractelement <4 x float> %tmp13, i32 3
|
||||
%tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 1
|
||||
%tmp19 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp18, align 16, !tbaa !0
|
||||
%tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 1
|
||||
%tmp19 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp18, align 16, !tbaa !0
|
||||
%tmp20 = add i32 %arg5, %arg7
|
||||
%tmp19.cast = bitcast <16 x i8> %tmp19 to <4 x i32>
|
||||
%tmp21 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp19.cast, i32 %tmp20, i32 0, i1 false, i1 false)
|
||||
|
@ -22,19 +22,19 @@ bb:
|
||||
br label %bb18
|
||||
|
||||
bb1: ; preds = %bb18
|
||||
%tmp = tail call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%tmp = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp3 = tail call i32 @llvm.amdgcn.workgroup.id.x()
|
||||
%tmp4 = getelementptr inbounds i8, i8 addrspace(2)* %tmp, i64 4
|
||||
%tmp5 = bitcast i8 addrspace(2)* %tmp4 to i16 addrspace(2)*
|
||||
%tmp6 = load i16, i16 addrspace(2)* %tmp5, align 4
|
||||
%tmp4 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 4
|
||||
%tmp5 = bitcast i8 addrspace(4)* %tmp4 to i16 addrspace(4)*
|
||||
%tmp6 = load i16, i16 addrspace(4)* %tmp5, align 4
|
||||
%tmp7 = zext i16 %tmp6 to i32
|
||||
%tmp8 = mul i32 %tmp3, %tmp7
|
||||
%tmp9 = add i32 %tmp8, %tmp2
|
||||
%tmp10 = tail call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%tmp10 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
%tmp11 = zext i32 %tmp9 to i64
|
||||
%tmp12 = bitcast i8 addrspace(2)* %tmp10 to i64 addrspace(2)*
|
||||
%tmp13 = load i64, i64 addrspace(2)* %tmp12, align 8
|
||||
%tmp12 = bitcast i8 addrspace(4)* %tmp10 to i64 addrspace(4)*
|
||||
%tmp13 = load i64, i64 addrspace(4)* %tmp12, align 8
|
||||
%tmp14 = add i64 %tmp13, %tmp11
|
||||
%tmp15 = zext i1 %tmp99 to i32
|
||||
%tmp16 = and i64 %tmp14, 4294967295
|
||||
@ -131,7 +131,7 @@ bb18: ; preds = %bb18, %bb
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
@ -140,7 +140,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #1
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
|
||||
|
||||
attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
||||
|
@ -1,12 +1,12 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck -check-prefix=OPT %s
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
; OPT-LABEL: @constant_load_i1
|
||||
; OPT: load i1
|
||||
; OPT-NEXT: store i1
|
||||
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
|
||||
%val = load i1, i1 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
|
||||
%val = load i1, i1 addrspace(4)* %in
|
||||
store i1 %val, i1 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -14,8 +14,8 @@ define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(
|
||||
; OPT-LABEL: @constant_load_i1_align2
|
||||
; OPT: load i1
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
|
||||
%val = load i1, i1 addrspace(2)* %in, align 2
|
||||
define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
|
||||
%val = load i1, i1 addrspace(4)* %in, align 2
|
||||
store i1 %val, i1 addrspace(1)* %out, align 2
|
||||
ret void
|
||||
}
|
||||
@ -25,8 +25,8 @@ define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 add
|
||||
; OPT-NEXT: load i32
|
||||
; OPT-NEXT: trunc
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
|
||||
%val = load i1, i1 addrspace(2)* %in, align 4
|
||||
define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
|
||||
%val = load i1, i1 addrspace(4)* %in, align 4
|
||||
store i1 %val, i1 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
@ -34,8 +34,8 @@ define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 add
|
||||
; OPT-LABEL: @constant_load_i8
|
||||
; OPT: load i8
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%val = load i8, i8 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%val = load i8, i8 addrspace(4)* %in
|
||||
store i8 %val, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -43,8 +43,8 @@ define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(
|
||||
; OPT-LABEL: @constant_load_i8_align2
|
||||
; OPT: load i8
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%val = load i8, i8 addrspace(2)* %in, align 2
|
||||
define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%val = load i8, i8 addrspace(4)* %in, align 2
|
||||
store i8 %val, i8 addrspace(1)* %out, align 2
|
||||
ret void
|
||||
}
|
||||
@ -54,8 +54,8 @@ define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 add
|
||||
; OPT-NEXT: load i32
|
||||
; OPT-NEXT: trunc
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
|
||||
%val = load i8, i8 addrspace(2)* %in, align 4
|
||||
define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
|
||||
%val = load i8, i8 addrspace(4)* %in, align 4
|
||||
store i8 %val, i8 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
@ -64,8 +64,8 @@ define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addr
|
||||
; OPT-LABEL: @constant_load_v2i8
|
||||
; OPT: load <2 x i8>
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -76,32 +76,32 @@ define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x
|
||||
; OPT-NEXT: trunc
|
||||
; OPT-NEXT: bitcast
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%ld = load <2 x i8>, <2 x i8> addrspace(2)* %in, align 4
|
||||
define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%ld = load <2 x i8>, <2 x i8> addrspace(4)* %in, align 4
|
||||
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_v3i8
|
||||
; OPT: bitcast <3 x i8>
|
||||
; OPT-NEXT: load i32, i32 addrspace(2)
|
||||
; OPT-NEXT: load i32, i32 addrspace(4)
|
||||
; OPT-NEXT: trunc i32
|
||||
; OPT-NEXT: bitcast i24
|
||||
; OPT-NEXT: store <3 x i8>
|
||||
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
|
||||
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_v3i8_align4
|
||||
; OPT: bitcast <3 x i8>
|
||||
; OPT-NEXT: load i32, i32 addrspace(2)
|
||||
; OPT-NEXT: load i32, i32 addrspace(4)
|
||||
; OPT-NEXT: trunc i32
|
||||
; OPT-NEXT: bitcast i24
|
||||
; OPT-NEXT: store <3 x i8>
|
||||
define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in, align 4
|
||||
define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
|
||||
%ld = load <3 x i8>, <3 x i8> addrspace(4)* %in, align 4
|
||||
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
@ -110,8 +110,8 @@ define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out
|
||||
; OPT: load i16
|
||||
; OPT: sext
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
@ -123,8 +123,8 @@ define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspa
|
||||
; OPT-NEXT: trunc
|
||||
; OPT-NEXT: sext
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(2)* %in, align 4
|
||||
define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
@ -133,8 +133,8 @@ define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16
|
||||
; OPT-LABEL: @constant_load_f16
|
||||
; OPT: load half
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(2)* %in) #0 {
|
||||
%ld = load half, half addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(4)* %in) #0 {
|
||||
%ld = load half, half addrspace(4)* %in
|
||||
store half %ld, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -142,8 +142,8 @@ define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrs
|
||||
; OPT-LABEL: @constant_load_v2f16
|
||||
; OPT: load <2 x half>
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %in) #0 {
|
||||
%ld = load <2 x half>, <2 x half> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %in) #0 {
|
||||
%ld = load <2 x half>, <2 x half> addrspace(4)* %in
|
||||
store <2 x half> %ld, <2 x half> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -151,8 +151,8 @@ define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2
|
||||
; OPT-LABEL: @load_volatile
|
||||
; OPT: load volatile i16
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
|
||||
%a = load volatile i16, i16 addrspace(2)* %in
|
||||
define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
|
||||
%a = load volatile i16, i16 addrspace(4)* %in
|
||||
store i16 %a, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -160,8 +160,8 @@ define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(2
|
||||
; OPT-LABEL: @constant_load_v2i8_volatile
|
||||
; OPT: load volatile <2 x i8>
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
|
||||
%ld = load volatile <2 x i8>, <2 x i8> addrspace(2)* %in
|
||||
define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
|
||||
%ld = load volatile <2 x i8>, <2 x i8> addrspace(4)* %in
|
||||
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
@ -182,8 +182,8 @@ define amdgpu_kernel void @constant_load_v2i8_addrspace1(<2 x i8> addrspace(1)*
|
||||
; OPT-NEXT: zext
|
||||
; OPT-NEXT: store
|
||||
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
|
||||
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%val = load i8, i8 addrspace(2)* %dispatch.ptr, align 4
|
||||
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
%val = load i8, i8 addrspace(4)* %dispatch.ptr, align 4
|
||||
%ld = zext i8 %val to i32
|
||||
store i32 %ld, i32 addrspace(1)* %ptr
|
||||
ret void
|
||||
|
@ -2,64 +2,64 @@
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
|
||||
@array = internal addrspace(4) constant [4096 x [32 x float]] zeroinitializer, align 4
|
||||
|
||||
; IR-LABEL: @sum_of_array(
|
||||
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
|
||||
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
|
||||
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
|
||||
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 1
|
||||
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 32
|
||||
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 33
|
||||
define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
|
||||
%tmp = sext i32 %y to i64
|
||||
%tmp1 = sext i32 %x to i64
|
||||
%tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
|
||||
%tmp4 = load float, float addrspace(2)* %tmp2, align 4
|
||||
%tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp
|
||||
%tmp4 = load float, float addrspace(4)* %tmp2, align 4
|
||||
%tmp5 = fadd float %tmp4, 0.000000e+00
|
||||
%tmp6 = add i32 %y, 1
|
||||
%tmp7 = sext i32 %tmp6 to i64
|
||||
%tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
|
||||
%tmp10 = load float, float addrspace(2)* %tmp8, align 4
|
||||
%tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp7
|
||||
%tmp10 = load float, float addrspace(4)* %tmp8, align 4
|
||||
%tmp11 = fadd float %tmp5, %tmp10
|
||||
%tmp12 = add i32 %x, 1
|
||||
%tmp13 = sext i32 %tmp12 to i64
|
||||
%tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
|
||||
%tmp16 = load float, float addrspace(2)* %tmp14, align 4
|
||||
%tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp
|
||||
%tmp16 = load float, float addrspace(4)* %tmp14, align 4
|
||||
%tmp17 = fadd float %tmp11, %tmp16
|
||||
%tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
|
||||
%tmp20 = load float, float addrspace(2)* %tmp18, align 4
|
||||
%tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp7
|
||||
%tmp20 = load float, float addrspace(4)* %tmp18, align 4
|
||||
%tmp21 = fadd float %tmp17, %tmp20
|
||||
store float %tmp21, float addrspace(1)* %output, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
|
||||
@array2 = internal addrspace(4) constant [4096 x [4 x float]] zeroinitializer, align 4
|
||||
|
||||
; Some of the indices go over the maximum mubuf offset, so don't split them.
|
||||
|
||||
; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
|
||||
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
|
||||
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 255
|
||||
; IR: add i32 %x, 256
|
||||
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
|
||||
define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
|
||||
%tmp = sext i32 %y to i64
|
||||
%tmp1 = sext i32 %x to i64
|
||||
%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
|
||||
%tmp4 = load float, float addrspace(2)* %tmp2, align 4
|
||||
%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp
|
||||
%tmp4 = load float, float addrspace(4)* %tmp2, align 4
|
||||
%tmp5 = fadd float %tmp4, 0.000000e+00
|
||||
%tmp6 = add i32 %y, 255
|
||||
%tmp7 = sext i32 %tmp6 to i64
|
||||
%tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
|
||||
%tmp10 = load float, float addrspace(2)* %tmp8, align 4
|
||||
%tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp7
|
||||
%tmp10 = load float, float addrspace(4)* %tmp8, align 4
|
||||
%tmp11 = fadd float %tmp5, %tmp10
|
||||
%tmp12 = add i32 %x, 256
|
||||
%tmp13 = sext i32 %tmp12 to i64
|
||||
%tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
|
||||
%tmp16 = load float, float addrspace(2)* %tmp14, align 4
|
||||
%tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp
|
||||
%tmp16 = load float, float addrspace(4)* %tmp14, align 4
|
||||
%tmp17 = fadd float %tmp11, %tmp16
|
||||
%tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
|
||||
%tmp20 = load float, float addrspace(2)* %tmp18, align 4
|
||||
%tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp7
|
||||
%tmp20 = load float, float addrspace(4)* %tmp18, align 4
|
||||
%tmp21 = fadd float %tmp17, %tmp20
|
||||
store float %tmp21, float addrspace(1)* %output, align 4
|
||||
ret void
|
||||
@ -97,18 +97,18 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y
|
||||
; IR: getelementptr {{.*}} !amdgpu.uniform
|
||||
; IR: getelementptr {{.*}} !amdgpu.uniform
|
||||
; IR: getelementptr {{.*}} !amdgpu.uniform
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
|
||||
main_body:
|
||||
%22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
|
||||
%23 = bitcast float %22 to i32
|
||||
%24 = shl i32 %23, 1
|
||||
%25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %1, i32 0, i32 %24, !amdgpu.uniform !0
|
||||
%26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0
|
||||
%25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(4)* %1, i32 0, i32 %24, !amdgpu.uniform !0
|
||||
%26 = load <8 x i32>, <8 x i32> addrspace(4)* %25, align 32, !invariant.load !0
|
||||
%27 = shl i32 %23, 2
|
||||
%28 = or i32 %27, 3
|
||||
%29 = bitcast [0 x <8 x i32>] addrspace(2)* %1 to [0 x <4 x i32>] addrspace(2)*
|
||||
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i32 0, i32 %28, !amdgpu.uniform !0
|
||||
%31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0
|
||||
%29 = bitcast [0 x <8 x i32>] addrspace(4)* %1 to [0 x <4 x i32>] addrspace(4)*
|
||||
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(4)* %29, i32 0, i32 %28, !amdgpu.uniform !0
|
||||
%31 = load <4 x i32>, <4 x i32> addrspace(4)* %30, align 16, !invariant.load !0
|
||||
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
|
||||
%33 = extractelement <4 x float> %32, i32 0
|
||||
%34 = extractelement <4 x float> %32, i32 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user