mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[OpenMP] Change OpenMPOpt to check openmp metadata
The metadata added in D102361 introduces a module flag that we can check to determine if the module was compiled with `-fopenmp` enables. We can now check for the precense of this instead of scanning the call graph for OpenMP runtime functions. Depends on D102361 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D102423
This commit is contained in:
parent
87c4e2706e
commit
b8d800fd9c
@ -20,60 +20,27 @@ namespace omp {
|
||||
/// Summary of a kernel (=entry point for target offloading).
|
||||
using Kernel = Function *;
|
||||
|
||||
/// Helper to remember if the module contains OpenMP (runtime calls), to be used
|
||||
/// foremost with containsOpenMP.
|
||||
struct OpenMPInModule {
|
||||
OpenMPInModule &operator=(bool Found) {
|
||||
if (Found)
|
||||
Value = OpenMPInModule::OpenMP::FOUND;
|
||||
else
|
||||
Value = OpenMPInModule::OpenMP::NOT_FOUND;
|
||||
return *this;
|
||||
}
|
||||
bool isKnown() { return Value != OpenMP::UNKNOWN; }
|
||||
operator bool() { return Value != OpenMP::NOT_FOUND; }
|
||||
/// Set of kernels in the module
|
||||
using KernelSet = SmallPtrSet<Kernel, 4>;
|
||||
|
||||
/// Does this function \p F contain any OpenMP runtime calls?
|
||||
bool containsOMPRuntimeCalls(Function *F) const {
|
||||
return FuncsWithOMPRuntimeCalls.contains(F);
|
||||
}
|
||||
/// Helper to determine if \p M contains OpenMP.
|
||||
bool containsOpenMP(Module &M);
|
||||
|
||||
/// Return the known kernels (=GPU entry points) in the module.
|
||||
SmallPtrSetImpl<Kernel> &getKernels() { return Kernels; }
|
||||
/// Helper to determine if \p M is a OpenMP target offloading device module.
|
||||
bool isOpenMPDevice(Module &M);
|
||||
|
||||
/// Identify kernels in the module and populate the Kernels set.
|
||||
void identifyKernels(Module &M);
|
||||
|
||||
private:
|
||||
enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
|
||||
|
||||
friend bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
|
||||
|
||||
/// In which functions are OpenMP runtime calls present?
|
||||
SmallPtrSet<Function *, 32> FuncsWithOMPRuntimeCalls;
|
||||
|
||||
/// Collection of known kernels (=GPU entry points) in the module.
|
||||
SmallPtrSet<Kernel, 8> Kernels;
|
||||
};
|
||||
|
||||
/// Helper to determine if \p M contains OpenMP (runtime calls).
|
||||
bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
|
||||
/// Get OpenMP device kernels in \p M.
|
||||
KernelSet getDeviceKernels(Module &M);
|
||||
|
||||
} // namespace omp
|
||||
|
||||
/// OpenMP optimizations pass.
|
||||
class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
|
||||
/// Helper to remember if the module contains OpenMP (runtime calls).
|
||||
omp::OpenMPInModule OMPInModule;
|
||||
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||
};
|
||||
|
||||
class OpenMPOptCGSCCPass : public PassInfoMixin<OpenMPOptCGSCCPass> {
|
||||
/// Helper to remember if the module contains OpenMP (runtime calls).
|
||||
omp::OpenMPInModule OMPInModule;
|
||||
|
||||
public:
|
||||
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
|
||||
LazyCallGraph &CG, CGSCCUpdateResult &UR);
|
||||
|
@ -1629,7 +1629,7 @@ private:
|
||||
for (auto *F : SCC) {
|
||||
if (!F->isDeclaration())
|
||||
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
|
||||
if (!OMPInfoCache.Kernels.empty())
|
||||
if (isOpenMPDevice(M))
|
||||
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
|
||||
}
|
||||
}
|
||||
@ -2629,17 +2629,18 @@ AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
|
||||
}
|
||||
|
||||
PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
|
||||
if (!containsOpenMP(M, OMPInModule))
|
||||
if (!containsOpenMP(M))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
if (DisableOpenMPOptimizations)
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
KernelSet Kernels = getDeviceKernels(M);
|
||||
|
||||
// Create internal copies of each function if this is a kernel Module.
|
||||
DenseSet<const Function *> InternalizedFuncs;
|
||||
if (!OMPInModule.getKernels().empty())
|
||||
if (isOpenMPDevice(M))
|
||||
for (Function &F : M)
|
||||
if (!F.isDeclaration() && !OMPInModule.getKernels().contains(&F))
|
||||
if (!F.isDeclaration() && !Kernels.contains(&F))
|
||||
if (Attributor::internalizeFunction(F, /* Force */ true))
|
||||
InternalizedFuncs.insert(&F);
|
||||
|
||||
@ -2665,10 +2666,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
|
||||
CallGraphUpdater CGUpdater;
|
||||
|
||||
SetVector<Function *> Functions(SCC.begin(), SCC.end());
|
||||
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions,
|
||||
OMPInModule.getKernels());
|
||||
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
|
||||
|
||||
unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
|
||||
unsigned MaxFixponitIterations = (Kernels.empty()) ? 64 : 32;
|
||||
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixponitIterations, OREGetter,
|
||||
DEBUG_TYPE);
|
||||
|
||||
@ -2684,30 +2684,25 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
|
||||
CGSCCAnalysisManager &AM,
|
||||
LazyCallGraph &CG,
|
||||
CGSCCUpdateResult &UR) {
|
||||
if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
|
||||
if (!containsOpenMP(*C.begin()->getFunction().getParent()))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
if (DisableOpenMPOptimizations)
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
SmallVector<Function *, 16> SCC;
|
||||
// If there are kernels in the module, we have to run on all SCC's.
|
||||
bool SCCIsInteresting = !OMPInModule.getKernels().empty();
|
||||
for (LazyCallGraph::Node &N : C) {
|
||||
Function *Fn = &N.getFunction();
|
||||
SCC.push_back(Fn);
|
||||
|
||||
// Do we already know that the SCC contains kernels,
|
||||
// or that OpenMP functions are called from this SCC?
|
||||
if (SCCIsInteresting)
|
||||
continue;
|
||||
// If not, let's check that.
|
||||
SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
|
||||
}
|
||||
|
||||
if (!SCCIsInteresting || SCC.empty())
|
||||
if (SCC.empty())
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
Module &M = *C.begin()->getFunction().getParent();
|
||||
|
||||
KernelSet Kernels = getDeviceKernels(M);
|
||||
|
||||
FunctionAnalysisManager &FAM =
|
||||
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
|
||||
|
||||
@ -2723,9 +2718,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
|
||||
|
||||
SetVector<Function *> Functions(SCC.begin(), SCC.end());
|
||||
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
|
||||
/*CGSCC*/ Functions, OMPInModule.getKernels());
|
||||
/*CGSCC*/ Functions, Kernels);
|
||||
|
||||
unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
|
||||
unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32;
|
||||
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixponitIterations, OREGetter,
|
||||
DEBUG_TYPE);
|
||||
|
||||
@ -2741,7 +2736,6 @@ namespace {
|
||||
|
||||
struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
|
||||
CallGraphUpdater CGUpdater;
|
||||
OpenMPInModule OMPInModule;
|
||||
static char ID;
|
||||
|
||||
OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
|
||||
@ -2752,38 +2746,27 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
|
||||
CallGraphSCCPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool doInitialization(CallGraph &CG) override {
|
||||
// Disable the pass if there is no OpenMP (runtime call) in the module.
|
||||
containsOpenMP(CG.getModule(), OMPInModule);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool runOnSCC(CallGraphSCC &CGSCC) override {
|
||||
if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
|
||||
if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
|
||||
return false;
|
||||
if (DisableOpenMPOptimizations || skipSCC(CGSCC))
|
||||
return false;
|
||||
|
||||
SmallVector<Function *, 16> SCC;
|
||||
// If there are kernels in the module, we have to run on all SCC's.
|
||||
bool SCCIsInteresting = !OMPInModule.getKernels().empty();
|
||||
for (CallGraphNode *CGN : CGSCC) {
|
||||
Function *Fn = CGN->getFunction();
|
||||
if (!Fn || Fn->isDeclaration())
|
||||
continue;
|
||||
SCC.push_back(Fn);
|
||||
|
||||
// Do we already know that the SCC contains kernels,
|
||||
// or that OpenMP functions are called from this SCC?
|
||||
if (SCCIsInteresting)
|
||||
continue;
|
||||
// If not, let's check that.
|
||||
SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
|
||||
}
|
||||
|
||||
if (!SCCIsInteresting || SCC.empty())
|
||||
if (SCC.empty())
|
||||
return false;
|
||||
|
||||
Module &M = CGSCC.getCallGraph().getModule();
|
||||
KernelSet Kernels = getDeviceKernels(M);
|
||||
|
||||
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
|
||||
CGUpdater.initialize(CG, CGSCC);
|
||||
|
||||
@ -2799,11 +2782,11 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
|
||||
AnalysisGetter AG;
|
||||
SetVector<Function *> Functions(SCC.begin(), SCC.end());
|
||||
BumpPtrAllocator Allocator;
|
||||
OMPInformationCache InfoCache(
|
||||
*(Functions.back()->getParent()), AG, Allocator,
|
||||
/*CGSCC*/ Functions, OMPInModule.getKernels());
|
||||
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
|
||||
Allocator,
|
||||
/*CGSCC*/ Functions, Kernels);
|
||||
|
||||
unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
|
||||
unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32;
|
||||
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
|
||||
MaxFixponitIterations, OREGetter, DEBUG_TYPE);
|
||||
|
||||
@ -2816,11 +2799,13 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
void OpenMPInModule::identifyKernels(Module &M) {
|
||||
|
||||
KernelSet llvm::omp::getDeviceKernels(Module &M) {
|
||||
// TODO: Create a more cross-platform way of determining device kernels.
|
||||
NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
|
||||
KernelSet Kernels;
|
||||
|
||||
if (!MD)
|
||||
return;
|
||||
return Kernels;
|
||||
|
||||
for (auto *Op : MD->operands()) {
|
||||
if (Op->getNumOperands() < 2)
|
||||
@ -2838,38 +2823,24 @@ void OpenMPInModule::identifyKernels(Module &M) {
|
||||
|
||||
Kernels.insert(KernelFn);
|
||||
}
|
||||
|
||||
return Kernels;
|
||||
}
|
||||
|
||||
bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
|
||||
if (OMPInModule.isKnown())
|
||||
return OMPInModule;
|
||||
bool llvm::omp::containsOpenMP(Module &M) {
|
||||
Metadata *MD = M.getModuleFlag("openmp");
|
||||
if (!MD)
|
||||
return false;
|
||||
|
||||
auto RecordFunctionsContainingUsesOf = [&](Function *F) {
|
||||
for (User *U : F->users())
|
||||
if (auto *I = dyn_cast<Instruction>(U))
|
||||
OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
// MSVC doesn't like long if-else chains for some reason and instead just
|
||||
// issues an error. Work around it..
|
||||
do {
|
||||
#define OMP_RTL(_Enum, _Name, ...) \
|
||||
if (Function *F = M.getFunction(_Name)) { \
|
||||
RecordFunctionsContainingUsesOf(F); \
|
||||
OMPInModule = true; \
|
||||
}
|
||||
#include "llvm/Frontend/OpenMP/OMPKinds.def"
|
||||
} while (false);
|
||||
bool llvm::omp::isOpenMPDevice(Module &M) {
|
||||
Metadata *MD = M.getModuleFlag("openmp-device");
|
||||
if (!MD)
|
||||
return false;
|
||||
|
||||
// Identify kernels once. TODO: We should split the OMPInformationCache into a
|
||||
// module and an SCC part. The kernel information, among other things, could
|
||||
// go into the module part.
|
||||
if (OMPInModule.isKnown() && OMPInModule) {
|
||||
OMPInModule.identifyKernels(M);
|
||||
return true;
|
||||
}
|
||||
|
||||
return OMPInModule = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
char OpenMPOptCGSCCLegacyPass::ID = 0;
|
||||
|
@ -1739,3 +1739,6 @@ attributes #0 = { noinline }
|
||||
; OPTIMISTIC: ; Function Attrs: convergent noinline nounwind
|
||||
; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32)
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -26,3 +26,7 @@ declare void @__kmpc_syncwarp(i64)
|
||||
|
||||
; OPTIMISTIC: ; Function Attrs: convergent nounwind
|
||||
; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -221,3 +221,7 @@ entry:
|
||||
call void @useI32(i32 %tid5)
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -30,7 +30,7 @@ declare !dbg !4 void @useI32(i32) local_unnamed_addr
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata)
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!8, !9, !10, !11, !12}
|
||||
!llvm.module.flags = !{!8, !9, !10, !11, !12, !29}
|
||||
!llvm.ident = !{!13}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
|
||||
@ -62,3 +62,4 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
|
||||
!26 = !DILocation(line: 9, column: 10, scope: !14)
|
||||
!27 = !DILocation(line: 10, column: 2, scope: !14)
|
||||
!28 = !DILocation(line: 13, column: 1, scope: !14)
|
||||
!29 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -4,13 +4,14 @@ source_filename = "declare_target_codegen_globalization.cpp"
|
||||
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64"
|
||||
|
||||
; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured.
|
||||
; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
|
||||
|
||||
@S = external local_unnamed_addr global i8*
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !8
|
||||
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
|
||||
%x_on_stack = bitcast i8* %0 to i32*
|
||||
%1 = bitcast i32* %x_on_stack to i8*
|
||||
call void @share(i8* %1)
|
||||
@ -30,13 +31,17 @@ declare void @__kmpc_free_shared(i8*)
|
||||
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
!nvvm.annotations = !{!7, !8}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "globalization_remarks.c", directory: "/tmp/globalization_remarks.c")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !{i32 1, !"wchar_size", i32 4}
|
||||
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!7 = !DISubroutineType(types: !2)
|
||||
!8 = !DILocation(line: 5, column: 7, scope: !6)
|
||||
!5 = !{i32 7, !"openmp", i32 50}
|
||||
!6 = !{i32 7, !"openmp-device", i32 50}
|
||||
!7 = !{void ()* @foo, !"kernel", i32 1}
|
||||
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!9 = !DISubroutineType(types: !2)
|
||||
!10 = !DILocation(line: 5, column: 7, scope: !8)
|
||||
|
@ -19,9 +19,11 @@ define void @non_kernel() {
|
||||
; Needed to trigger the openmp-opt pass
|
||||
declare dso_local void @__kmpc_kernel_prepare_parallel(i8*)
|
||||
|
||||
!llvm.module.flags = !{!4}
|
||||
!nvvm.annotations = !{!2, !0, !1, !3, !1, !2}
|
||||
|
||||
!0 = !{void ()* @kernel1, !"kernel", i32 1}
|
||||
!1 = !{void ()* @non_kernel, !"non_kernel", i32 1}
|
||||
!2 = !{null, !"align", i32 1}
|
||||
!3 = !{void ()* @kernel2, !"kernel", i32 1}
|
||||
!4 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -275,5 +275,8 @@ declare void @__kmpc_kernel_end_parallel()
|
||||
|
||||
|
||||
!nvvm.annotations = !{!1}
|
||||
!llvm.module.flags = !{!2, !3}
|
||||
|
||||
!1 = !{void ()* @__omp_offloading_50_6dfa0f01_foo_l6, !"kernel", i32 1}
|
||||
!2 = !{i32 7, !"openmp", i32 50}
|
||||
!3 = !{i32 7, !"openmp-device", i32 50}
|
||||
|
@ -522,3 +522,7 @@ declare dso_local i32 @rand(...)
|
||||
|
||||
; CHECK: declare void @__tgt_target_data_begin_mapper_issue(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**, %struct.__tgt_async_info*)
|
||||
; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*)
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -67,7 +67,7 @@ attributes #4 = { argmemonly nounwind willreturn }
|
||||
attributes #5 = { nounwind readnone speculatable willreturn }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!13, !14, !15}
|
||||
!llvm.module.flags = !{!13, !14, !15, !59}
|
||||
!llvm.ident = !{!16}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 73cea83a6f5ab521edf3cccfc603534776d691ec)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
|
||||
@ -129,3 +129,4 @@ attributes #5 = { nounwind readnone speculatable willreturn }
|
||||
!56 = !DILocation(line: 18, column: 1, scope: !33)
|
||||
!57 = !{!58}
|
||||
!58 = !{i64 2, i64 -1, i64 -1, i1 true}
|
||||
!59 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -675,5 +675,8 @@ define i32 @test6(i32 %0) {
|
||||
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
|
||||
!llvm.module.flags = !{!2}
|
||||
|
||||
!0 = !{!1}
|
||||
!1 = !{i64 2, i64 -1, i64 -1, i1 true}
|
||||
!2 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -739,6 +739,8 @@ declare void @readonly() readonly
|
||||
|
||||
declare void @readnone() readnone
|
||||
|
||||
!llvm.module.flags = !{!8}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!"clang"}
|
||||
!2 = !{!3}
|
||||
@ -747,3 +749,4 @@ declare void @readnone() readnone
|
||||
!5 = !{!"int", !6, i64 0}
|
||||
!6 = !{!"omnipotent char", !7, i64 0}
|
||||
!7 = !{!"Simple C/C++ TBAA"}
|
||||
!8 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -69,7 +69,7 @@ attributes #1 = { readonly willreturn }
|
||||
attributes #2 = { readnone willreturn }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!9, !10, !11, !12, !13}
|
||||
!llvm.module.flags = !{!9, !10, !11, !12, !13, !52}
|
||||
!llvm.ident = !{!14}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
|
||||
@ -124,3 +124,4 @@ attributes #2 = { readnone willreturn }
|
||||
!49 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !47, type: !28, flags: DIFlagArtificial)
|
||||
!50 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !47, type: !28, flags: DIFlagArtificial)
|
||||
!51 = !DILocation(line: 15, column: 2, scope: !47)
|
||||
!52 = !{i32 7, !"openmp", i32 50}
|
||||
|
@ -782,11 +782,12 @@ entry:
|
||||
}
|
||||
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.module.flags = !{!0, !3}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!2}
|
||||
!2 = !{i64 2, i64 -1, i64 -1, i1 true}
|
||||
!3 = !{i32 7, !"openmp", i32 50}
|
||||
; CHECK-LABEL: define {{[^@]+}}@merge
|
||||
; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
|
||||
; CHECK-NEXT: entry:
|
||||
@ -6908,3 +6909,4 @@ entry:
|
||||
; CHECK2-NEXT: call void @use(i32 [[TMP0]])
|
||||
; CHECK2-NEXT: ret void
|
||||
;
|
||||
|
||||
|
@ -30,7 +30,7 @@ define internal void @foo() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !9
|
||||
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !11
|
||||
call void @use(i8* %0)
|
||||
call void @__kmpc_free_shared(i8* %0)
|
||||
ret void
|
||||
@ -46,7 +46,7 @@ define internal void @bar() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
|
||||
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12
|
||||
call void @share(i8* %0)
|
||||
call void @__kmpc_free_shared(i8* %0)
|
||||
ret void
|
||||
@ -76,7 +76,7 @@ declare i8* @__kmpc_alloc_shared(i64)
|
||||
declare void @__kmpc_free_shared(i8*)
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
!llvm.module.flags = !{!3, !4, !6, !7}
|
||||
!nvvm.annotations = !{!5}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
@ -85,8 +85,10 @@ declare void @__kmpc_free_shared(i8*)
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !{i32 1, !"wchar_size", i32 4}
|
||||
!5 = !{void ()* @kernel, !"kernel", i32 1}
|
||||
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!8 = !DISubroutineType(types: !2)
|
||||
!9 = !DILocation(line: 2, column: 2, scope: !6)
|
||||
!10 = !DILocation(line: 4, column: 2, scope: !7)
|
||||
!6 = !{i32 7, !"openmp", i32 50}
|
||||
!7 = !{i32 7, !"openmp-device", i32 50}
|
||||
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!10 = !DISubroutineType(types: !2)
|
||||
!11 = !DILocation(line: 2, column: 2, scope: !8)
|
||||
!12 = !DILocation(line: 4, column: 2, scope: !9)
|
||||
|
@ -35,7 +35,7 @@ entry:
|
||||
%cmp = icmp eq i32 %tid, 0
|
||||
br i1 %cmp, label %master, label %exit
|
||||
master:
|
||||
%x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !9
|
||||
%x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !11
|
||||
%x_on_stack = bitcast i8* %x to [4 x i32]*
|
||||
%0 = bitcast [4 x i32]* %x_on_stack to i8*
|
||||
call void @use(i8* %0)
|
||||
@ -58,7 +58,7 @@ entry:
|
||||
%3 = icmp eq i32 %tid, %master_tid
|
||||
br i1 %3, label %master, label %exit
|
||||
master:
|
||||
%y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
|
||||
%y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12
|
||||
%y_on_stack = bitcast i8* %y to [4 x i32]*
|
||||
%4 = bitcast [4 x i32]* %y_on_stack to i8*
|
||||
call void @use(i8* %4)
|
||||
@ -87,18 +87,19 @@ declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
|
||||
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
!nvvm.annotations = !{!5, !6}
|
||||
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
!nvvm.annotations = !{!7, !8}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !{i32 1, !"wchar_size", i32 4}
|
||||
!5 = !{void ()* @foo, !"kernel", i32 1}
|
||||
!6 = !{void ()* @bar, !"kernel", i32 1}
|
||||
!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!8 = !DISubroutineType(types: !2)
|
||||
!9 = !DILocation(line: 5, column: 7, scope: !7)
|
||||
!10 = !DILocation(line: 5, column: 14, scope: !7)
|
||||
!5 = !{i32 7, !"openmp", i32 50}
|
||||
!6 = !{i32 7, !"openmp-device", i32 50}
|
||||
!7 = !{void ()* @foo, !"kernel", i32 1}
|
||||
!8 = !{void ()* @bar, !"kernel", i32 1}
|
||||
!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!10 = !DISubroutineType(types: !2)
|
||||
!11 = !DILocation(line: 5, column: 7, scope: !9)
|
||||
!12 = !DILocation(line: 5, column: 14, scope: !9)
|
||||
|
@ -49,13 +49,14 @@ declare void @__kmpc_flush(%struct.ident_t*)
|
||||
; Different return type.
|
||||
declare void @omp_get_thread_num()
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.module.flags = !{!0, !4}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!"clang"}
|
||||
!2 = !{!3}
|
||||
!3 = !{i64 2, i64 -1, i64 -1, i1 true}
|
||||
!4 = !{i32 7, !"openmp", i32 50}
|
||||
|
||||
; NPM: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.)
|
||||
; NPM-NOT: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.)
|
||||
|
@ -68,8 +68,8 @@ declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
declare void @__kmpc_kernel_init(i32, i16)
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
!nvvm.annotations = !{!5}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
!nvvm.annotations = !{!7}
|
||||
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
@ -77,4 +77,6 @@ declare void @__kmpc_kernel_init(i32, i16)
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !{i32 1, !"wchar_size", i32 4}
|
||||
!5 = !{void ()* @kernel, !"kernel", i32 1}
|
||||
!5 = !{i32 7, !"openmp", i32 50}
|
||||
!6 = !{i32 7, !"openmp-device", i32 50}
|
||||
!7 = !{void ()* @kernel, !"kernel", i32 1}
|
||||
|
@ -70,3 +70,8 @@ declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i
|
||||
declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
|
||||
|
||||
declare dso_local i32 @rand(...)
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 7, !"openmp", i32 50}
|
||||
|
||||
|
@ -9,7 +9,7 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
%x = call i8* @__kmpc_alloc_shared(i64 4), !dbg !7
|
||||
%x = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
|
||||
%x_on_stack = bitcast i8* %x to i32*
|
||||
%0 = bitcast i32* %x_on_stack to i8*
|
||||
call void @use(i8* %0)
|
||||
@ -36,13 +36,17 @@ declare i8* @_Z10SafeMallocmPKc(i64 %size, i8* nocapture readnone %msg)
|
||||
declare void @__kmpc_free_shared(i8*)
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
!nvvm.annotations = !{!7}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "openmp_opt_module.c", directory: "/tmp/openmp_opt_module.c")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !{i32 1, !"wchar_size", i32 4}
|
||||
!5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!6 = !DISubroutineType(types: !2)
|
||||
!7 = !DILocation(line: 5, column: 7, scope: !5)
|
||||
!5 = !{i32 7, !"openmp", i32 50}
|
||||
!6 = !{i32 7, !"openmp-device", i32 50}
|
||||
!7 = !{void ()* @foo, !"kernel", i32 1}
|
||||
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
!9 = !DISubroutineType(types: !2)
|
||||
!10 = !DILocation(line: 5, column: 7, scope: !8)
|
||||
|
Loading…
Reference in New Issue
Block a user