From 36d549956590541587e8517ba690004062fb4a97 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 18 May 2021 20:10:05 -0400 Subject: [PATCH] [Attributor] Change AAExecutionDomain to only accept intrinsics Summary: The OpenMP runtime functions don't always provide unique thread ID's to determine if a basic block is truly single-threaded. Change the implementation to only check NVPTX intrinsics for now. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D102700 --- lib/Transforms/IPO/OpenMPOpt.cpp | 21 +++--- .../OpenMP/single_threaded_execution.ll | 71 +++++++++++++------ 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/lib/Transforms/IPO/OpenMPOpt.cpp b/lib/Transforms/IPO/OpenMPOpt.cpp index a05c7539ae4..74b025e6f12 100644 --- a/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/lib/Transforms/IPO/OpenMPOpt.cpp @@ -23,6 +23,9 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" @@ -2330,7 +2333,6 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { }; ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { - auto &OMPInfoCache = static_cast(A.getInfoCache()); Function *F = getAnchorScope(); ReversePostOrderTraversal RPOT(F); auto NumSingleThreadedBBs = SingleThreadedBBs.size(); @@ -2366,17 +2368,12 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { if (!C || !C->isZero()) return false; - if (auto *CB = dyn_cast(Cmp->getOperand(0))) { - RuntimeFunction ThreadNumRuntimeIDs[] = {OMPRTL_omp_get_thread_num, - OMPRTL___kmpc_master, - OMPRTL___kmpc_global_thread_num}; - - for (const auto ThreadNumRuntimeID : ThreadNumRuntimeIDs) { - auto &RFI = OMPInfoCache.RFIs[ThreadNumRuntimeID]; - if (CB->getCalledFunction() == RFI.Declaration) - return true; - } - } + if (auto *II = dyn_cast(Cmp->getOperand(0))) + if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) + return true; + if (auto *II = dyn_cast(Cmp->getOperand(0))) + if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x) + return true; return false; }; diff --git a/test/Transforms/OpenMP/single_threaded_execution.ll b/test/Transforms/OpenMP/single_threaded_execution.ll index 00b73823fe9..3dbfc9eb8b5 100644 --- a/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/test/Transforms/OpenMP/single_threaded_execution.ll @@ -2,26 +2,20 @@ ; REQUIRES: asserts ; ModuleID = 'single_threaded_exeuction.c' -%struct.ident_t = type { i32, i32, i32, i32, i8* } - -@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 -@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 - -; CHECK: [openmp-opt] Basic block @bar entry is executed by a single thread. -; Function Attrs: noinline nounwind uwtable -define internal void @bar() { -entry: +define void @kernel() { + call void @__kmpc_kernel_init(i32 512, i16 1) + call void @nvptx() + call void @amdgcn() ret void } -; CHECK-NOT: [openmp-opt] Basic block @foo entry is executed by a single thread. -; CHECK: [openmp-opt] Basic block @foo if.then is executed by a single thread. -; CHECK-NOT: [openmp-opt] Basic block @foo if.end is executed by a single thread. +; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread. +; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread. +; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread. ; Function Attrs: noinline nounwind uwtable -define dso_local void @foo() { +define dso_local void @nvptx() { entry: - %call = call i32 @omp_get_thread_num() + %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cmp = icmp eq i32 %call, 0 br i1 %cmp, label %if.then, label %if.end @@ -33,12 +27,45 @@ if.end: ret void } -declare dso_local i32 @omp_get_thread_num() +; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread. +; CHECK: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread. +; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread. +; Function Attrs: noinline nounwind uwtable +define dso_local void @amdgcn() { +entry: + %call = call i32 @llvm.amdgcn.workitem.id.x() + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %if.then, label %if.end -!llvm.module.flags = !{!0} -!llvm.ident = !{!1} +if.then: + call void @bar() + br label %if.end -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{!"clang version 13.0.0"} -!2 = !{!3} -!3 = !{i64 2, i64 -1, i64 -1, i1 true} +if.end: + ret void +} + +; CHECK: [openmp-opt] Basic block @bar entry is executed by a single thread. +; Function Attrs: noinline nounwind uwtable +define internal void @bar() { +entry: + ret void +} + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() + +declare i32 @llvm.amdgcn.workitem.id.x() + +declare void @__kmpc_kernel_init(i32, i16) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!nvvm.annotations = !{!5} + + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "single_threaded_execution.c", directory: "/tmp/single_threaded_execution.c") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{void ()* @kernel, !"kernel", i32 1}