mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-21 18:22:53 +01:00
[AMDGPU] Fix missing lowering of LDS used in global scope.
Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D103431
This commit is contained in:
parent
34315a15e8
commit
5c332a4b0a
@ -171,7 +171,7 @@ private:
|
||||
|
||||
// Find variables to move into new struct instance
|
||||
std::vector<GlobalVariable *> FoundLocalVars =
|
||||
AMDGPU::findVariablesToLower(M, UsedList, F);
|
||||
AMDGPU::findVariablesToLower(M, F);
|
||||
|
||||
if (FoundLocalVars.empty()) {
|
||||
// No variables to rewrite, no changes made.
|
||||
|
@ -60,15 +60,34 @@ void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) {
|
||||
}
|
||||
}
|
||||
|
||||
bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
|
||||
const GlobalVariable &GV, const Function *F) {
|
||||
// Any LDS variable can be lowered by moving into the created struct
|
||||
// Each variable so lowered is allocated in every kernel, so variables
|
||||
// whose users are all known to be safe to lower without the transform
|
||||
// are left unchanged.
|
||||
bool hasUserInstruction(const GlobalValue *GV) {
|
||||
SmallPtrSet<const User *, 8> Visited;
|
||||
SmallVector<const User *, 16> Stack(GV->users());
|
||||
|
||||
while (!Stack.empty()) {
|
||||
const User *U = Stack.pop_back_val();
|
||||
|
||||
if (!Visited.insert(U).second)
|
||||
continue;
|
||||
|
||||
if (isa<Instruction>(U))
|
||||
return true;
|
||||
|
||||
append_range(Stack, U->users());
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) {
|
||||
// We are not interested in kernel LDS lowering for module LDS itself.
|
||||
if (F && GV.getName() == "llvm.amdgcn.module.lds")
|
||||
return false;
|
||||
|
||||
bool Ret = false;
|
||||
SmallPtrSet<const User *, 8> Visited;
|
||||
SmallVector<const User *, 16> Stack(GV.users());
|
||||
SmallPtrSet<const GlobalValue *, 8> GlobalUsers;
|
||||
|
||||
assert(!F || isKernelCC(F));
|
||||
|
||||
@ -76,10 +95,16 @@ bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
|
||||
const User *V = Stack.pop_back_val();
|
||||
Visited.insert(V);
|
||||
|
||||
if (auto *G = dyn_cast<GlobalValue>(V->stripPointerCasts())) {
|
||||
if (UsedList.contains(G)) {
|
||||
continue;
|
||||
if (auto *G = dyn_cast<GlobalValue>(V)) {
|
||||
StringRef GName = G->getName();
|
||||
if (F && GName != "llvm.used" && GName != "llvm.compiler.used") {
|
||||
// For kernel LDS lowering, if G is not a compiler.used list, then we
|
||||
// cannot lower the lds GV since we cannot replace the use of GV within
|
||||
// G.
|
||||
return false;
|
||||
}
|
||||
GlobalUsers.insert(G);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||
@ -88,32 +113,32 @@ bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
|
||||
// Used from this kernel, we want to put it into the structure.
|
||||
Ret = true;
|
||||
} else if (!F) {
|
||||
// For module LDS lowering, lowering is required if the user instruction
|
||||
// is from non-kernel function.
|
||||
Ret |= !isKernelCC(UF);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto *E = dyn_cast<ConstantExpr>(V)) {
|
||||
for (const User *U : E->users()) {
|
||||
if (Visited.insert(U).second) {
|
||||
Stack.push_back(U);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// User V should be a constant, recursively visit users of V.
|
||||
assert(isa<Constant>(V) && "Expected a constant.");
|
||||
append_range(Stack, V->users());
|
||||
}
|
||||
|
||||
// Unknown user, conservatively lower the variable.
|
||||
// For module LDS conservatively means place it into the module LDS struct.
|
||||
// For kernel LDS it means lower as a standalone variable.
|
||||
return !F;
|
||||
if (!F && !Ret) {
|
||||
// For module LDS lowering, we have not yet decided if we should lower GV or
|
||||
// not. Explore all global users of GV, and check if atleast one of these
|
||||
// global users appear as an use within an instruction (possibly nested use
|
||||
// via constant expression), if so, then conservately lower LDS.
|
||||
for (auto *G : GlobalUsers)
|
||||
Ret |= hasUserInstruction(G);
|
||||
}
|
||||
|
||||
return Ret;
|
||||
}
|
||||
|
||||
std::vector<GlobalVariable *>
|
||||
findVariablesToLower(Module &M, const SmallPtrSetImpl<GlobalValue *> &UsedList,
|
||||
const Function *F) {
|
||||
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
|
||||
const Function *F) {
|
||||
std::vector<llvm::GlobalVariable *> LocalVars;
|
||||
for (auto &GV : M.globals()) {
|
||||
if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
|
||||
@ -137,7 +162,7 @@ findVariablesToLower(Module &M, const SmallPtrSetImpl<GlobalValue *> &UsedList,
|
||||
// dropped by the back end if not. This pass skips over it.
|
||||
continue;
|
||||
}
|
||||
if (!shouldLowerLDSToStruct(UsedList, GV, F)) {
|
||||
if (!shouldLowerLDSToStruct(GV, F)) {
|
||||
continue;
|
||||
}
|
||||
LocalVars.push_back(&GV);
|
||||
|
@ -25,19 +25,19 @@ bool isKernelCC(const Function *Func);
|
||||
|
||||
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
|
||||
|
||||
/// \returns true if a given global variable \p GV (or its global users) appear
|
||||
/// as an use within some instruction (either from kernel or from non-kernel).
|
||||
bool hasUserInstruction(const GlobalValue *GV);
|
||||
|
||||
/// \returns true if an LDS global requres lowering to a module LDS structure
|
||||
/// if \p F is not given. If \p F is given it must be a kernel and function
|
||||
/// \returns true if an LDS global is directly used from that kernel and it
|
||||
/// is safe to replace its uses with a kernel LDS structure member.
|
||||
/// \p UsedList contains a union of llvm.used and llvm.compiler.used variables
|
||||
/// which do not count as a use.
|
||||
bool shouldLowerLDSToStruct(const SmallPtrSetImpl<GlobalValue *> &UsedList,
|
||||
const GlobalVariable &GV,
|
||||
bool shouldLowerLDSToStruct(const GlobalVariable &GV,
|
||||
const Function *F = nullptr);
|
||||
|
||||
std::vector<GlobalVariable *>
|
||||
findVariablesToLower(Module &M, const SmallPtrSetImpl<GlobalValue *> &UsedList,
|
||||
const Function *F = nullptr);
|
||||
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
|
||||
const Function *F = nullptr);
|
||||
|
||||
SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M);
|
||||
|
||||
|
55
test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll
Normal file
55
test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
|
||||
|
||||
;.
|
||||
; Kernel LDS lowering.
|
||||
;.
|
||||
; @lds.1: is part of @llvm.used list, and also it is used within kernel, hence it is lowered.
|
||||
; @lds.2: is part of @llvm.compiler.used list, and also it is used within kernel, hence it is lowered.
|
||||
; @lds.3: is used as initializer to @gptr.3, hence @lds.3 is not lowered, though it is used within kernel.
|
||||
; @lds.4: is used as initializer to @gptr.4, hence @lds.4 is not lowered, though it is used within kernel,
|
||||
; irrespective of the uses of @gptr.4 itself ( @gptr.4 is part of llvm.compiler.used list ).
|
||||
; @lds.5: is part of @llvm.used list, but is not used within kernel, hence it is not lowered.
|
||||
; @lds.6: is part of @llvm.compiler.used list, but is not used within kernel, hence it is not lowered.
|
||||
;.
|
||||
|
||||
; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { i32, i16 }
|
||||
|
||||
; CHECK-NOT: @lds.1
|
||||
; CHECK-NOT: @lds.2
|
||||
; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8
|
||||
; CHECK: @lds.4 = addrspace(3) global float undef, align 4
|
||||
; CHECK: @lds.5 = addrspace(3) global i16 undef, align 2
|
||||
; CHECK: @lds.6 = addrspace(3) global i32 undef, align 4
|
||||
@lds.1 = addrspace(3) global i16 undef, align 2
|
||||
@lds.2 = addrspace(3) global i32 undef, align 4
|
||||
@lds.3 = addrspace(3) global i64 undef, align 8
|
||||
@lds.4 = addrspace(3) global float undef, align 4
|
||||
@lds.5 = addrspace(3) global i16 undef, align 2
|
||||
@lds.6 = addrspace(3) global i32 undef, align 4
|
||||
|
||||
; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
|
||||
; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8
|
||||
@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
|
||||
@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8
|
||||
|
||||
; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 4
|
||||
|
||||
; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
|
||||
; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
|
||||
@llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
|
||||
@llvm.compiler.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
|
||||
|
||||
; CHECK-LABEL: @k0()
|
||||
; CHECK: %ld.lds.1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 2
|
||||
; CHECK: %ld.lds.2 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0), align 4
|
||||
; CHECK: %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3, align 4
|
||||
; CHECK: %ld.lds.4 = load float, float addrspace(3)* @lds.4, align 4
|
||||
; CHECK: ret void
|
||||
define amdgpu_kernel void @k0() {
|
||||
%ld.lds.1 = load i16, i16 addrspace(3)* @lds.1
|
||||
%ld.lds.2 = load i32, i32 addrspace(3)* @lds.2
|
||||
%ld.lds.3 = load i64, i64 addrspace(3)* @lds.3
|
||||
%ld.lds.4 = load float, float addrspace(3)* @lds.4
|
||||
ret void
|
||||
}
|
93
test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll
Normal file
93
test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll
Normal file
@ -0,0 +1,93 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
|
||||
|
||||
;.
|
||||
; @lds.1: is aliased with @alias.to.lds.1, and @alias.to.lds.1 is used within kernel @k0.
|
||||
; Hence, @lds.1 is lowered.
|
||||
; @lds.2: is aliased with @alias.to.lds.2, and @alias.to.lds.2 is used within non-kernel @f0,
|
||||
; Hence, @lds.2 is lowered.
|
||||
; @lds.3: is used as initializer to global @gptr.3, and @gptr.3 is aliased with @alias.to.gptr.3,
|
||||
; and @alias.to.gptr.3 is used within kernel @k1. Hence, @lds.3 is lowered.
|
||||
; @lds.4: is used as initializer to global @gptr.4, and @gptr.4 is aliased with @alias.to.gptr.4,
|
||||
; and @alias.to.gptr.4 is used within non-kernel @f1. Hence, @lds.4 is lowered.
|
||||
; @lds.5: is aliased with @alias.to.lds.5, but neither @lds.5 nor @alias.to.lds.5 is used anywhere.
|
||||
; Hence, @lds.5 is not lowered.
|
||||
; @lds.6: is used as initializer to global @gptr.6, and @gptr.6 is aliased with @alias.to.gptr.6.
|
||||
; But none of them are used anywhere. Hence, @lds.6 is not lowered.
|
||||
;.
|
||||
|
||||
; CHECK: %llvm.amdgcn.module.lds.t = type { [4 x i8], [3 x i8], [1 x i8], [2 x i8], [1 x i8] }
|
||||
|
||||
; CHECK-NOT: @lds.1
|
||||
; CHECK-NOT: @lds.2
|
||||
; CHECK-NOT: @lds.3
|
||||
; CHECK-NOT: @lds.4
|
||||
; CHECK: @lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8
|
||||
; CHECK: @lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8
|
||||
@lds.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
|
||||
@lds.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2
|
||||
@lds.3 = internal unnamed_addr addrspace(3) global [3 x i8] undef, align 4
|
||||
@lds.4 = internal unnamed_addr addrspace(3) global [4 x i8] undef, align 4
|
||||
@lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8
|
||||
@lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8
|
||||
|
||||
; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([3 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8
|
||||
; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8
|
||||
; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([6 x i8] addrspace(3)* @lds.6 to i64 addrspace(3)*) to i64*), align 8
|
||||
@gptr.3 = addrspace(1) global i64* addrspacecast ([3 x i8] addrspace(3)* @lds.3 to i64*), align 8
|
||||
@gptr.4 = addrspace(1) global i64* addrspacecast ([4 x i8] addrspace(3)* @lds.4 to i64*), align 8
|
||||
@gptr.6 = addrspace(1) global i64* addrspacecast ([6 x i8] addrspace(3)* @lds.6 to i64*), align 8
|
||||
|
||||
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4
|
||||
; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata"
|
||||
|
||||
; CHECK: @alias.to.lds.1 = alias [1 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 4)
|
||||
; CHECK: @alias.to.lds.2 = alias [2 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3)
|
||||
; CHECK: @alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3
|
||||
; CHECK: @alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4
|
||||
; CHECK: @alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5
|
||||
; CHECK: @alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6
|
||||
@alias.to.lds.1 = alias [1 x i8], [1 x i8] addrspace(3)* @lds.1
|
||||
@alias.to.lds.2 = alias [2 x i8], [2 x i8] addrspace(3)* @lds.2
|
||||
@alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3
|
||||
@alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4
|
||||
@alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5
|
||||
@alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6
|
||||
|
||||
; CHECK-LABEL: @f1
|
||||
; CHECK: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4, align 8
|
||||
; CHECK: ret void
|
||||
define void @f1() {
|
||||
%ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @f0
|
||||
; CHECK: %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)*
|
||||
; CHECK: store i8 1, i8 addrspace(3)* %bc, align 2
|
||||
; CHECK: ret void
|
||||
define void @f0() {
|
||||
%bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)*
|
||||
store i8 1, i8 addrspace(3)* %bc, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @k1
|
||||
; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
|
||||
; CHECK-LABEL: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3, align 8
|
||||
; CHECK-LABEL: ret void
|
||||
define amdgpu_kernel void @k1() {
|
||||
%ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @k0
|
||||
; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
|
||||
; CHECK-LABEL: %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)*
|
||||
; CHECK-LABEL: store i8 1, i8 addrspace(3)* %bc, align 1
|
||||
; CHECK-LABEL: ret void
|
||||
define amdgpu_kernel void @k0() {
|
||||
%bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)*
|
||||
store i8 1, i8 addrspace(3)* %bc, align 1
|
||||
ret void
|
||||
}
|
88
test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll
Normal file
88
test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll
Normal file
@ -0,0 +1,88 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
|
||||
|
||||
;.
|
||||
; @lds.1: is part of @llvm.used list, and is no-where used. Hence it is not lowered.
|
||||
; @lds.2: is part of @llvm.compiler.used list, and is no-where used. Hence it is not lowered.
|
||||
; @lds.3: is used as initializer to @gptr.3, and is no-where used. @gptr.3 itself is also not
|
||||
; used anywhere else, hence @lds.3 is not lowered.
|
||||
; @lds.4: is used as initializer to @gptr.4, and is no-where used. @gptr.4 is part of
|
||||
; @llvm.compiler.used list, but is no-where else used. hence @lds.4 is not lowered.
|
||||
;
|
||||
; @lds.5: is used as initializer to @gptr.5, and is no-where used. @gptr.5 is part of
|
||||
; @llvm.compiler.used list, but is also used within kernel @k0. Hence @lds.5 is lowered.
|
||||
; @lds.6: is used as initializer to @gptr.6, and is no-where used. @gptr.6 is part of
|
||||
; @llvm.compiler.used list, but is also used within non-kernel function @f0. Hence @lds.6 is lowered.
|
||||
; @lds.7: is used as initializer to @gptr.7, and is no-where used. @gptr.7 is used as initializer to @gptr.8,
|
||||
; and @gptr.8 is used within non-kernel function @f1. Hence @lds.7 is lowered.
|
||||
;.
|
||||
|
||||
; CHECK: %llvm.amdgcn.module.lds.t = type { [3 x float], [4 x i8], [2 x float], [1 x float] }
|
||||
|
||||
; CHECK: @lds.1 = addrspace(3) global i16 undef, align 2
|
||||
; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4
|
||||
; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8
|
||||
; CHECK: @lds.4 = addrspace(3) global float undef, align 4
|
||||
; CHECK-NOT: @lds.5
|
||||
; CHECK-NOT: @lds.6
|
||||
; CHECK-NOT: @lds.7
|
||||
@lds.1 = addrspace(3) global i16 undef, align 2
|
||||
@lds.2 = addrspace(3) global i32 undef, align 4
|
||||
@lds.3 = addrspace(3) global i64 undef, align 8
|
||||
@lds.4 = addrspace(3) global float undef, align 4
|
||||
@lds.5 = addrspace(3) global [1 x float] undef, align 4
|
||||
@lds.6 = addrspace(3) global [2 x float] undef, align 8
|
||||
@lds.7 = addrspace(3) global [3 x float] undef, align 16
|
||||
|
||||
; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
|
||||
; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8
|
||||
; CHECK: @gptr.5 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([1 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3) to i64 addrspace(3)*) to i64*), align 8
|
||||
; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([2 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) to i64 addrspace(3)*) to i64*), align 8
|
||||
; CHECK: @gptr.7 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8
|
||||
; CHECK: @gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8
|
||||
@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
|
||||
@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8
|
||||
@gptr.5 = addrspace(1) global i64* addrspacecast ([1 x float] addrspace(3)* @lds.5 to i64*), align 8
|
||||
@gptr.6 = addrspace(1) global i64* addrspacecast ([2 x float] addrspace(3)* @lds.6 to i64*), align 8
|
||||
@gptr.7 = addrspace(1) global i64* addrspacecast ([3 x float] addrspace(3)* @lds.7 to i64*), align 8
|
||||
@gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8
|
||||
|
||||
; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
|
||||
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16
|
||||
; CHECK: @llvm.compiler.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
|
||||
@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
|
||||
@llvm.compiler.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
|
||||
|
||||
; CHECK-LABEL: @f1()
|
||||
; CHECK: %ld = load i64**, i64** addrspace(1)* @gptr.8, align 8
|
||||
; CHECK: ret void
|
||||
define void @f1() {
|
||||
%ld = load i64**, i64** addrspace(1)* @gptr.8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @f0()
|
||||
; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32
|
||||
; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
|
||||
; CHECK: ret void
|
||||
define void @f0() {
|
||||
%ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @k0()
|
||||
; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
|
||||
; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32
|
||||
; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
|
||||
; CHECK: ret void
|
||||
define amdgpu_kernel void @k0() {
|
||||
%ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @k1()
|
||||
; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
|
||||
; CHECK: ret void
|
||||
define amdgpu_kernel void @k1() {
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user