mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[OpenMP] Add new execution mode for SPMD execution with Generic semantics
Qualified kernels can be transformed from generic-mode to SPMD mode using an optimization in OpenMPOpt. This patch introduces a new execution mode to indicate kernels that have been transformed from generic-mode to SPMD-mode. These kernels have SPMD-mode execution, but need generic-mode semantics for scheduling the blocks and threads. Without this far too few blocks will be scheduled for a generic region as SPMD mode expects the trip count to be divided by the number of threads. Reviewed By: ggeorgakoudis Differential Revision: https://reviews.llvm.org/D106460
This commit is contained in:
parent
472a223072
commit
2c3ddf5d6f
@ -2886,8 +2886,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
||||
assert(ExecMode->getInitializer() &&
|
||||
ExecMode->getInitializer()->isOneValue() &&
|
||||
"Initially non-SPMD kernel has SPMD exec mode!");
|
||||
ExecMode->setInitializer(
|
||||
ConstantInt::get(ExecMode->getInitializer()->getType(), 0));
|
||||
|
||||
// Set the global exec mode flag to indicate SPMD-Generic mode.
|
||||
constexpr int SPMDGeneric = 2;
|
||||
if (!ExecMode->getInitializer()->isZeroValue())
|
||||
ExecMode->setInitializer(
|
||||
ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric));
|
||||
|
||||
// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
|
||||
const int InitIsSPMDArgNo = 1;
|
||||
|
@ -13,7 +13,7 @@ target triple = "nvptx64"
|
||||
|
||||
;.
|
||||
; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
|
||||
; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
|
||||
; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
|
||||
; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
|
||||
; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
|
||||
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
|
||||
|
@ -32,7 +32,7 @@ target triple = "nvptx64"
|
||||
;.
|
||||
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
|
||||
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
|
||||
; CHECK: @[[__OMP_OFFLOADING_2C_38C77_SEQUENTIAL_LOOP_L4_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
|
||||
; CHECK: @[[__OMP_OFFLOADING_2C_38C77_SEQUENTIAL_LOOP_L4_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
|
||||
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2c_38c77_sequential_loop_l4_exec_mode], section "llvm.metadata"
|
||||
;.
|
||||
define weak void @__omp_offloading_2c_38c77_sequential_loop_l4() #0 {
|
||||
|
Loading…
Reference in New Issue
Block a user