1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[Attributor] Change AAExecutionDomain to only accept intrinsics

Summary:
The OpenMP runtime functions don't always provide unique thread ID's to
determine if a basic block is truly single-threaded. Change the implementation
to only check NVPTX intrinsics for now.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D102700
This commit is contained in:
Joseph Huber 2021-05-18 20:10:05 -04:00 committed by Huber, Joseph
parent 5f78ed293a
commit 36d5499565
2 changed files with 58 additions and 34 deletions

View File

@ -23,6 +23,9 @@
#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/InitializePasses.h" #include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO.h"
@ -2330,7 +2333,6 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
}; };
ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
Function *F = getAnchorScope(); Function *F = getAnchorScope();
ReversePostOrderTraversal<Function *> RPOT(F); ReversePostOrderTraversal<Function *> RPOT(F);
auto NumSingleThreadedBBs = SingleThreadedBBs.size(); auto NumSingleThreadedBBs = SingleThreadedBBs.size();
@ -2366,17 +2368,12 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
if (!C || !C->isZero()) if (!C || !C->isZero())
return false; return false;
if (auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0))) { if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
RuntimeFunction ThreadNumRuntimeIDs[] = {OMPRTL_omp_get_thread_num, if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
OMPRTL___kmpc_master, return true;
OMPRTL___kmpc_global_thread_num}; if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
for (const auto ThreadNumRuntimeID : ThreadNumRuntimeIDs) {
auto &RFI = OMPInfoCache.RFIs[ThreadNumRuntimeID];
if (CB->getCalledFunction() == RFI.Declaration)
return true; return true;
}
}
return false; return false;
}; };

View File

@ -2,26 +2,20 @@
; REQUIRES: asserts ; REQUIRES: asserts
; ModuleID = 'single_threaded_exeuction.c' ; ModuleID = 'single_threaded_exeuction.c'
%struct.ident_t = type { i32, i32, i32, i32, i8* } define void @kernel() {
call void @__kmpc_kernel_init(i32 512, i16 1)
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 call void @nvptx()
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 call void @amdgcn()
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
; CHECK: [openmp-opt] Basic block @bar entry is executed by a single thread.
; Function Attrs: noinline nounwind uwtable
define internal void @bar() {
entry:
ret void ret void
} }
; CHECK-NOT: [openmp-opt] Basic block @foo entry is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread.
; CHECK: [openmp-opt] Basic block @foo if.then is executed by a single thread. ; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @foo if.end is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread.
; Function Attrs: noinline nounwind uwtable ; Function Attrs: noinline nounwind uwtable
define dso_local void @foo() { define dso_local void @nvptx() {
entry: entry:
%call = call i32 @omp_get_thread_num() %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cmp = icmp eq i32 %call, 0 %cmp = icmp eq i32 %call, 0
br i1 %cmp, label %if.then, label %if.end br i1 %cmp, label %if.then, label %if.end
@ -33,12 +27,45 @@ if.end:
ret void ret void
} }
declare dso_local i32 @omp_get_thread_num() ; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
; CHECK: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
; Function Attrs: noinline nounwind uwtable
define dso_local void @amdgcn() {
entry:
%call = call i32 @llvm.amdgcn.workitem.id.x()
%cmp = icmp eq i32 %call, 0
br i1 %cmp, label %if.then, label %if.end
!llvm.module.flags = !{!0} if.then:
!llvm.ident = !{!1} call void @bar()
br label %if.end
!0 = !{i32 1, !"wchar_size", i32 4} if.end:
!1 = !{!"clang version 13.0.0"} ret void
!2 = !{!3} }
!3 = !{i64 2, i64 -1, i64 -1, i1 true}
; CHECK: [openmp-opt] Basic block @bar entry is executed by a single thread.
; Function Attrs: noinline nounwind uwtable
define internal void @bar() {
entry:
ret void
}
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.amdgcn.workitem.id.x()
declare void @__kmpc_kernel_init(i32, i16)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!nvvm.annotations = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "single_threaded_execution.c", directory: "/tmp/single_threaded_execution.c")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{void ()* @kernel, !"kernel", i32 1}