mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[OpenMP] Set RequiresFullRuntime false in SPMDization
SPMDization in D102307 does not change the RequiresFullRuntime argument of kmpc_target_init/deinit calls. However, the constraints of SPMDization detection for converting a target region to SPMD mode should guarantee that the region does not require full runtime support. Hence, this patch sets RequiresFullRuntime to false for improved execution performance. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D105556
This commit is contained in:
parent
dcddd93e08
commit
43d4e670a4
@ -493,7 +493,8 @@ struct KernelInfoState : AbstractState {
|
||||
BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
|
||||
|
||||
/// State to track if we are in SPMD-mode, assumed or know, and why we decided
|
||||
/// we cannot be.
|
||||
/// we cannot be. If it is assumed, then RequiresFullRuntime should also be
|
||||
/// false.
|
||||
BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;
|
||||
|
||||
/// The __kmpc_target_init call in this kernel, if any. If we find more than
|
||||
@ -2773,9 +2774,32 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
||||
return Val;
|
||||
};
|
||||
|
||||
Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
|
||||
[&](const IRPosition &IRP, const AbstractAttribute *AA,
|
||||
bool &UsedAssumedInformation) -> Optional<Value *> {
|
||||
// IRP represents the "RequiresFullRuntime" argument of an
|
||||
// __kmpc_target_init or __kmpc_target_deinit call. We will answer this
|
||||
// one with the internal state of the SPMDCompatibilityTracker, so if
|
||||
// generic then true, if SPMD then false.
|
||||
if (!SPMDCompatibilityTracker.isValidState())
|
||||
return nullptr;
|
||||
if (!SPMDCompatibilityTracker.isAtFixpoint()) {
|
||||
if (AA)
|
||||
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
|
||||
UsedAssumedInformation = true;
|
||||
} else {
|
||||
UsedAssumedInformation = false;
|
||||
}
|
||||
auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
|
||||
!SPMDCompatibilityTracker.isAssumed());
|
||||
return Val;
|
||||
};
|
||||
|
||||
constexpr const int InitIsSPMDArgNo = 1;
|
||||
constexpr const int DeinitIsSPMDArgNo = 1;
|
||||
constexpr const int InitUseStateMachineArgNo = 2;
|
||||
constexpr const int InitRequiresFullRuntimeArgNo = 3;
|
||||
constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
|
||||
A.registerSimplificationCallback(
|
||||
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
|
||||
StateMachineSimplifyCB);
|
||||
@ -2785,6 +2809,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
||||
A.registerSimplificationCallback(
|
||||
IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
|
||||
IsSPMDModeSimplifyCB);
|
||||
A.registerSimplificationCallback(
|
||||
IRPosition::callsite_argument(*KernelInitCB,
|
||||
InitRequiresFullRuntimeArgNo),
|
||||
IsGenericModeSimplifyCB);
|
||||
A.registerSimplificationCallback(
|
||||
IRPosition::callsite_argument(*KernelDeinitCB,
|
||||
DeinitRequiresFullRuntimeArgNo),
|
||||
IsGenericModeSimplifyCB);
|
||||
|
||||
// Check if we know we are in SPMD-mode already.
|
||||
ConstantInt *IsSPMDArg =
|
||||
@ -2861,6 +2893,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
||||
const int InitIsSPMDArgNo = 1;
|
||||
const int DeinitIsSPMDArgNo = 1;
|
||||
const int InitUseStateMachineArgNo = 2;
|
||||
const int InitRequiresFullRuntimeArgNo = 3;
|
||||
const int DeinitRequiresFullRuntimeArgNo = 2;
|
||||
|
||||
auto &Ctx = getAnchorValue().getContext();
|
||||
A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
|
||||
@ -2871,6 +2905,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
||||
A.changeUseAfterManifest(
|
||||
KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
|
||||
*ConstantInt::getBool(Ctx, 1));
|
||||
A.changeUseAfterManifest(
|
||||
KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
|
||||
*ConstantInt::getBool(Ctx, 0));
|
||||
A.changeUseAfterManifest(
|
||||
KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
|
||||
*ConstantInt::getBool(Ctx, 0));
|
||||
|
||||
++NumOpenMPTargetRegionKernelsSPMD;
|
||||
|
||||
auto Remark = [&](OptimizationRemark OR) {
|
||||
|
@ -131,7 +131,8 @@ target triple = "nvptx64"
|
||||
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
|
||||
@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_2c_389eb_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_l19_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_pure_l72_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86_exec_mode, i8* @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106_exec_mode], section "llvm.metadata"
|
||||
|
||||
; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
|
||||
; The second to last argument of __kmpc_target_init is set to false to indicate we do not need the generic runtime state machine.
|
||||
; The last argument is also set to false due to SPMDization.
|
||||
; No user code state machine is build because we do not need one.
|
||||
define weak void @__omp_offloading_2c_389eb_no_state_machine_needed_l14() #0 {
|
||||
entry:
|
||||
@ -1525,14 +1526,14 @@ attributes #10 = { convergent nounwind readonly willreturn }
|
||||
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef true, i1 noundef false, i1 noundef true)
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef true, i1 noundef false, i1 noundef false)
|
||||
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
||||
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
||||
; CHECK: user_code.entry:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
|
||||
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
||||
; CHECK-NEXT: call void @__omp_outlined__12(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true)
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: worker.exit:
|
||||
; CHECK-NEXT: ret void
|
||||
|
@ -27,6 +27,7 @@ target triple = "nvptx64"
|
||||
@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_2c_38c77_sequential_loop_l4_exec_mode], section "llvm.metadata"
|
||||
|
||||
; The second argument of __kmpc_target_init and deinit is is set to true to indicate that we can run in SPMD mode.
|
||||
; The last argument is set to false since full runtime support is not needed in SPMDization.
|
||||
; We also adjusted the global __omp_offloading_2c_38c77_sequential_loop_l4_exec_mode to have a zero initializer (which indicates SPMD mode to the runtime).
|
||||
;.
|
||||
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
|
||||
@ -41,14 +42,14 @@ define weak void @__omp_offloading_2c_38c77_sequential_loop_l4() #0 {
|
||||
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true)
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false)
|
||||
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
|
||||
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
|
||||
; CHECK: user_code.entry:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]]
|
||||
; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
|
||||
; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true)
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: worker.exit:
|
||||
; CHECK-NEXT: ret void
|
||||
|
Loading…
Reference in New Issue
Block a user