1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[AMDGPU] Fix missing lowering of LDS used in global scope.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D103431
This commit is contained in:
hsmahesha 2021-06-10 08:00:23 +05:30
parent 34315a15e8
commit 5c332a4b0a
6 changed files with 294 additions and 33 deletions

View File

@ -171,7 +171,7 @@ private:
// Find variables to move into new struct instance
std::vector<GlobalVariable *> FoundLocalVars =
AMDGPU::findVariablesToLower(M, UsedList, F);
AMDGPU::findVariablesToLower(M, F);
if (FoundLocalVars.empty()) {
// No variables to rewrite, no changes made.

View File

@ -60,15 +60,34 @@ void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) {
}
}
bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
const GlobalVariable &GV, const Function *F) {
// Any LDS variable can be lowered by moving into the created struct
// Each variable so lowered is allocated in every kernel, so variables
// whose users are all known to be safe to lower without the transform
// are left unchanged.
bool hasUserInstruction(const GlobalValue *GV) {
SmallPtrSet<const User *, 8> Visited;
SmallVector<const User *, 16> Stack(GV->users());
while (!Stack.empty()) {
const User *U = Stack.pop_back_val();
if (!Visited.insert(U).second)
continue;
if (isa<Instruction>(U))
return true;
append_range(Stack, U->users());
}
return false;
}
bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) {
// We are not interested in kernel LDS lowering for module LDS itself.
if (F && GV.getName() == "llvm.amdgcn.module.lds")
return false;
bool Ret = false;
SmallPtrSet<const User *, 8> Visited;
SmallVector<const User *, 16> Stack(GV.users());
SmallPtrSet<const GlobalValue *, 8> GlobalUsers;
assert(!F || isKernelCC(F));
@ -76,10 +95,16 @@ bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
const User *V = Stack.pop_back_val();
Visited.insert(V);
if (auto *G = dyn_cast<GlobalValue>(V->stripPointerCasts())) {
if (UsedList.contains(G)) {
continue;
if (auto *G = dyn_cast<GlobalValue>(V)) {
StringRef GName = G->getName();
if (F && GName != "llvm.used" && GName != "llvm.compiler.used") {
// For kernel LDS lowering, if G is not a compiler.used list, then we
// cannot lower the lds GV since we cannot replace the use of GV within
// G.
return false;
}
GlobalUsers.insert(G);
continue;
}
if (auto *I = dyn_cast<Instruction>(V)) {
@ -88,32 +113,32 @@ bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
// Used from this kernel, we want to put it into the structure.
Ret = true;
} else if (!F) {
// For module LDS lowering, lowering is required if the user instruction
// is from non-kernel function.
Ret |= !isKernelCC(UF);
}
continue;
}
if (auto *E = dyn_cast<ConstantExpr>(V)) {
for (const User *U : E->users()) {
if (Visited.insert(U).second) {
Stack.push_back(U);
}
}
continue;
}
// User V should be a constant, recursively visit users of V.
assert(isa<Constant>(V) && "Expected a constant.");
append_range(Stack, V->users());
}
// Unknown user, conservatively lower the variable.
// For module LDS conservatively means place it into the module LDS struct.
// For kernel LDS it means lower as a standalone variable.
return !F;
if (!F && !Ret) {
// For module LDS lowering, we have not yet decided if we should lower GV or
// not. Explore all global users of GV, and check if atleast one of these
// global users appear as an use within an instruction (possibly nested use
// via constant expression), if so, then conservately lower LDS.
for (auto *G : GlobalUsers)
Ret |= hasUserInstruction(G);
}
return Ret;
}
std::vector<GlobalVariable *>
findVariablesToLower(Module &M, const SmallPtrSetImpl<GlobalValue *> &UsedList,
const Function *F) {
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
const Function *F) {
std::vector<llvm::GlobalVariable *> LocalVars;
for (auto &GV : M.globals()) {
if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
@ -137,7 +162,7 @@ findVariablesToLower(Module &M, const SmallPtrSetImpl<GlobalValue *> &UsedList,
// dropped by the back end if not. This pass skips over it.
continue;
}
if (!shouldLowerLDSToStruct(UsedList, GV, F)) {
if (!shouldLowerLDSToStruct(GV, F)) {
continue;
}
LocalVars.push_back(&GV);

View File

@ -25,19 +25,19 @@ bool isKernelCC(const Function *Func);
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
/// \returns true if a given global variable \p GV (or its global users) appear
/// as an use within some instruction (either from kernel or from non-kernel).
bool hasUserInstruction(const GlobalValue *GV);
/// \returns true if an LDS global requres lowering to a module LDS structure
/// if \p F is not given. If \p F is given it must be a kernel and function
/// \returns true if an LDS global is directly used from that kernel and it
/// is safe to replace its uses with a kernel LDS structure member.
/// \p UsedList contains a union of llvm.used and llvm.compiler.used variables
/// which do not count as a use.
bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
const GlobalVariable &GV,
bool shouldLowerLDSToStruct(const GlobalVariable &GV,
const Function *F = nullptr);
std::vector<GlobalVariable *>
findVariablesToLower(Module &M, const SmallPtrSetImpl<GlobalValue *> &UsedList,
const Function *F = nullptr);
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
const Function *F = nullptr);
SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M);

View File

@ -0,0 +1,55 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
;.
; Kernel LDS lowering.
;.
; @lds.1: is part of @llvm.used list, and also it is used within kernel, hence it is lowered.
; @lds.2: is part of @llvm.compiler.used list, and also it is used within kernel, hence it is lowered.
; @lds.3: is used as initializer to @gptr.3, hence @lds.3 is not lowered, though it is used within kernel.
; @lds.4: is used as initializer to @gptr.4, hence @lds.4 is not lowered, though it is used within kernel,
; irrespective of the uses of @gptr.4 itself ( @gptr.4 is part of llvm.compiler.used list ).
; @lds.5: is part of @llvm.used list, but is not used within kernel, hence it is not lowered.
; @lds.6: is part of @llvm.compiler.used list, but is not used within kernel, hence it is not lowered.
;.
; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { i32, i16 }
; CHECK-NOT: @lds.1
; CHECK-NOT: @lds.2
; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8
; CHECK: @lds.4 = addrspace(3) global float undef, align 4
; CHECK: @lds.5 = addrspace(3) global i16 undef, align 2
; CHECK: @lds.6 = addrspace(3) global i32 undef, align 4
@lds.1 = addrspace(3) global i16 undef, align 2
@lds.2 = addrspace(3) global i32 undef, align 4
@lds.3 = addrspace(3) global i64 undef, align 8
@lds.4 = addrspace(3) global float undef, align 4
@lds.5 = addrspace(3) global i16 undef, align 2
@lds.6 = addrspace(3) global i32 undef, align 4
; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8
@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8
; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 4
; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
@llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
@llvm.compiler.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
; CHECK-LABEL: @k0()
; CHECK: %ld.lds.1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 2
; CHECK: %ld.lds.2 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0), align 4
; CHECK: %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3, align 4
; CHECK: %ld.lds.4 = load float, float addrspace(3)* @lds.4, align 4
; CHECK: ret void
define amdgpu_kernel void @k0() {
%ld.lds.1 = load i16, i16 addrspace(3)* @lds.1
%ld.lds.2 = load i32, i32 addrspace(3)* @lds.2
%ld.lds.3 = load i64, i64 addrspace(3)* @lds.3
%ld.lds.4 = load float, float addrspace(3)* @lds.4
ret void
}

View File

@ -0,0 +1,93 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
;.
; @lds.1: is aliased with @alias.to.lds.1, and @alias.to.lds.1 is used within kernel @k0.
; Hence, @lds.1 is lowered.
; @lds.2: is aliased with @alias.to.lds.2, and @alias.to.lds.2 is used within non-kernel @f0,
; Hence, @lds.2 is lowered.
; @lds.3: is used as initializer to global @gptr.3, and @gptr.3 is aliased with @alias.to.gptr.3,
; and @alias.to.gptr.3 is used within kernel @k1. Hence, @lds.3 is lowered.
; @lds.4: is used as initializer to global @gptr.4, and @gptr.4 is aliased with @alias.to.gptr.4,
; and @alias.to.gptr.4 is used within non-kernel @f1. Hence, @lds.4 is lowered.
; @lds.5: is aliased with @alias.to.lds.5, but neither @lds.5 nor @alias.to.lds.5 is used anywhere.
; Hence, @lds.5 is not lowered.
; @lds.6: is used as initializer to global @gptr.6, and @gptr.6 is aliased with @alias.to.gptr.6.
; But none of them are used anywhere. Hence, @lds.6 is not lowered.
;.
; CHECK: %llvm.amdgcn.module.lds.t = type { [4 x i8], [3 x i8], [1 x i8], [2 x i8], [1 x i8] }
; CHECK-NOT: @lds.1
; CHECK-NOT: @lds.2
; CHECK-NOT: @lds.3
; CHECK-NOT: @lds.4
; CHECK: @lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8
; CHECK: @lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8
@lds.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
@lds.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2
@lds.3 = internal unnamed_addr addrspace(3) global [3 x i8] undef, align 4
@lds.4 = internal unnamed_addr addrspace(3) global [4 x i8] undef, align 4
@lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8
@lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8
; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([3 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8
; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8
; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([6 x i8] addrspace(3)* @lds.6 to i64 addrspace(3)*) to i64*), align 8
@gptr.3 = addrspace(1) global i64* addrspacecast ([3 x i8] addrspace(3)* @lds.3 to i64*), align 8
@gptr.4 = addrspace(1) global i64* addrspacecast ([4 x i8] addrspace(3)* @lds.4 to i64*), align 8
@gptr.6 = addrspace(1) global i64* addrspacecast ([6 x i8] addrspace(3)* @lds.6 to i64*), align 8
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4
; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata"
; CHECK: @alias.to.lds.1 = alias [1 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 4)
; CHECK: @alias.to.lds.2 = alias [2 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3)
; CHECK: @alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3
; CHECK: @alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4
; CHECK: @alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5
; CHECK: @alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6
@alias.to.lds.1 = alias [1 x i8], [1 x i8] addrspace(3)* @lds.1
@alias.to.lds.2 = alias [2 x i8], [2 x i8] addrspace(3)* @lds.2
@alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3
@alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4
@alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5
@alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6
; CHECK-LABEL: @f1
; CHECK: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4, align 8
; CHECK: ret void
define void @f1() {
%ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4
ret void
}
; CHECK-LABEL: @f0
; CHECK: %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)*
; CHECK: store i8 1, i8 addrspace(3)* %bc, align 2
; CHECK: ret void
define void @f0() {
%bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)*
store i8 1, i8 addrspace(3)* %bc, align 2
ret void
}
; CHECK-LABEL: @k1
; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
; CHECK-LABEL: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3, align 8
; CHECK-LABEL: ret void
define amdgpu_kernel void @k1() {
%ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3
ret void
}
; CHECK-LABEL: @k0
; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
; CHECK-LABEL: %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)*
; CHECK-LABEL: store i8 1, i8 addrspace(3)* %bc, align 1
; CHECK-LABEL: ret void
define amdgpu_kernel void @k0() {
%bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)*
store i8 1, i8 addrspace(3)* %bc, align 1
ret void
}

View File

@ -0,0 +1,88 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
;.
; @lds.1: is part of @llvm.used list, and is no-where used. Hence it is not lowered.
; @lds.2: is part of @llvm.compiler.used list, and is no-where used. Hence it is not lowered.
; @lds.3: is used as initializer to @gptr.3, and is no-where used. @gptr.3 itself is also not
; used anywhere else, hence @lds.3 is not lowered.
; @lds.4: is used as initializer to @gptr.4, and is no-where used. @gptr.4 is part of
; @llvm.compiler.used list, but is no-where else used. hence @lds.4 is not lowered.
;
; @lds.5: is used as initializer to @gptr.5, and is no-where used. @gptr.5 is part of
; @llvm.compiler.used list, but is also used within kernel @k0. Hence @lds.5 is lowered.
; @lds.6: is used as initializer to @gptr.6, and is no-where used. @gptr.6 is part of
; @llvm.compiler.used list, but is also used within non-kernel function @f0. Hence @lds.6 is lowered.
; @lds.7: is used as initializer to @gptr.7, and is no-where used. @gptr.7 is used as initializer to @gptr.8,
; and @gptr.8 is used within non-kernel function @f1. Hence @lds.7 is lowered.
;.
; CHECK: %llvm.amdgcn.module.lds.t = type { [3 x float], [4 x i8], [2 x float], [1 x float] }
; CHECK: @lds.1 = addrspace(3) global i16 undef, align 2
; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4
; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8
; CHECK: @lds.4 = addrspace(3) global float undef, align 4
; CHECK-NOT: @lds.5
; CHECK-NOT: @lds.6
; CHECK-NOT: @lds.7
@lds.1 = addrspace(3) global i16 undef, align 2
@lds.2 = addrspace(3) global i32 undef, align 4
@lds.3 = addrspace(3) global i64 undef, align 8
@lds.4 = addrspace(3) global float undef, align 4
@lds.5 = addrspace(3) global [1 x float] undef, align 4
@lds.6 = addrspace(3) global [2 x float] undef, align 8
@lds.7 = addrspace(3) global [3 x float] undef, align 16
; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8
; CHECK: @gptr.5 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([1 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3) to i64 addrspace(3)*) to i64*), align 8
; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([2 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) to i64 addrspace(3)*) to i64*), align 8
; CHECK: @gptr.7 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8
; CHECK: @gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8
@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8
@gptr.5 = addrspace(1) global i64* addrspacecast ([1 x float] addrspace(3)* @lds.5 to i64*), align 8
@gptr.6 = addrspace(1) global i64* addrspacecast ([2 x float] addrspace(3)* @lds.6 to i64*), align 8
@gptr.7 = addrspace(1) global i64* addrspacecast ([3 x float] addrspace(3)* @lds.7 to i64*), align 8
@gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8
; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16
; CHECK: @llvm.compiler.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
@llvm.compiler.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
; CHECK-LABEL: @f1()
; CHECK: %ld = load i64**, i64** addrspace(1)* @gptr.8, align 8
; CHECK: ret void
define void @f1() {
%ld = load i64**, i64** addrspace(1)* @gptr.8
ret void
}
; CHECK-LABEL: @f0()
; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32
; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
; CHECK: ret void
define void @f0() {
%ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
ret void
}
; CHECK-LABEL: @k0()
; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32
; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
; CHECK: ret void
define amdgpu_kernel void @k0() {
%ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
ret void
}
; CHECK-LABEL: @k1()
; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
; CHECK: ret void
define amdgpu_kernel void @k1() {
ret void
}