From 64f169d63d13696e344af114be1d4ab142505181 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 10 Feb 2020 14:28:47 -0600 Subject: [PATCH] [OpenMP][IRBuilder] Perform finalization (incl. outlining) late In order to fix PR44560 and to prepare for loop transformations we now finalize a function late, which will also do the outlining late. The logic is as before but the actual outlining step happens now after the function was fully constructed. Once we have loop transformations we can apply them in the finalize step before the outlining. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D74372 --- include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 17 ++ lib/Frontend/OpenMP/OMPIRBuilder.cpp | 229 +++++++++++--------- lib/Transforms/Utils/CodeExtractor.cpp | 6 +- unittests/Frontend/OpenMPIRBuilderTest.cpp | 20 +- 4 files changed, 162 insertions(+), 110 deletions(-) diff --git a/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 07ec82df14c..a1470bc0495 100644 --- a/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -34,6 +34,9 @@ public: /// before any other method and only once! void initialize(); + /// Finalize the underlying module, e.g., by outlining regions. + void finalize(); + /// Add attributes known for \p FnID to \p Fn. void addAttributes(omp::RuntimeFunction FnID, Function &Fn); @@ -254,6 +257,20 @@ private: /// Map to remember existing ident_t*. DenseMap, GlobalVariable *> IdentMap; + + /// Helper that contains information about regions we need to outline + /// during finalization. + struct OutlineInfo { + SmallVector Blocks; + using PostOutlineCBTy = std::function; + PostOutlineCBTy PostOutlineCB; + }; + + /// Collection of regions that need to be outlined during finalization. + SmallVector OutlineInfos; + + /// Add a new region that will be outlined later. + void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } }; } // end namespace llvm diff --git a/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/lib/Frontend/OpenMP/OMPIRBuilder.cpp index d45d1d02447..24184595890 100644 --- a/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -93,6 +93,55 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) { void OpenMPIRBuilder::initialize() { initializeTypes(M); } +void OpenMPIRBuilder::finalize() { + for (OutlineInfo &OI : OutlineInfos) { + assert(!OI.Blocks.empty() && + "Outlined regions should have at least a single block!"); + BasicBlock *RegEntryBB = OI.Blocks.front(); + Function *OuterFn = RegEntryBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OuterFn); + CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".omp_par"); + + LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); + + Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + + LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + + // For compability with the clang CG we move the outlined function after the + // one with the parallel region. + OutlinedFn->removeFromParent(); + M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); + + // Remove the artificial entry introduced by the extractor right away, we + // made our own entry block after all. + { + BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); + assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB); + assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry); + RegEntryBB->moveBefore(&ArtificialEntry); + ArtificialEntry.eraseFromParent(); + } + assert(&OutlinedFn->getEntryBlock() == RegEntryBB); + assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + + // Run a user callback, e.g. to add attributes. + if (OI.PostOutlineCB) + OI.PostOutlineCB(*OutlinedFn); + } + + // Allow finalize to be called multiple times. + OutlineInfos.clear(); +} + Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, IdentFlag LocFlags) { // Enable "C-mode". @@ -415,17 +464,18 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( // PRegionExitBB <- A common exit to simplify block collection. // - LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); - LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); + OutlineInfo OI; SmallPtrSet ParallelRegionBlockSet; - SmallVector ParallelRegionBlocks, Worklist; + SmallVector Worklist; ParallelRegionBlockSet.insert(PRegEntryBB); ParallelRegionBlockSet.insert(PRegExitBB); @@ -433,14 +483,14 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Worklist.push_back(PRegEntryBB); while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); - ParallelRegionBlocks.push_back(BB); + OI.Blocks.push_back(BB); for (BasicBlock *SuccBB : successors(BB)) if (ParallelRegionBlockSet.insert(SuccBB).second) Worklist.push_back(SuccBB); } CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -455,7 +505,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); - LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); FunctionCallee TIDRTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num); @@ -496,56 +546,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( PrivHelper(*Output); } - LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); LLVM_DEBUG({ - for (auto *BB : ParallelRegionBlocks) + for (auto *BB : OI.Blocks) dbgs() << " PBR: " << BB->getName() << "\n"; }); - // Add some known attributes to the outlined function. - Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); - OutlinedFn->addParamAttr(0, Attribute::NoAlias); - OutlinedFn->addParamAttr(1, Attribute::NoAlias); - OutlinedFn->addFnAttr(Attribute::NoUnwind); - OutlinedFn->addFnAttr(Attribute::NoRecurse); - - LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); - LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); - - // For compability with the clang CG we move the outlined function after the - // one with the parallel region. - OutlinedFn->removeFromParent(); - M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); - - // Remove the artificial entry introduced by the extractor right away, we - // made our own entry block after all. - { - BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); - assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB); - assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry); - PRegEntryBB->moveBefore(&ArtificialEntry); - ArtificialEntry.eraseFromParent(); - } - LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); - assert(&OutlinedFn->getEntryBlock() == PRegEntryBB); - - assert(OutlinedFn && OutlinedFn->getNumUses() == 1); - assert(OutlinedFn->arg_size() >= 2 && - "Expected at least tid and bounded tid as arguments"); - unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; - - CallInst *CI = cast(OutlinedFn->user_back()); - CI->getParent()->setName("omp_parallel"); - Builder.SetInsertPoint(CI); - - // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); - Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), - Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; - - SmallVector RealArgs; - RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); - RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); - FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); if (auto *F = dyn_cast(RTLFn.getCallee())) { if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { @@ -558,59 +564,86 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( // callback callee. F->addMetadata( llvm::LLVMContext::MD_callback, - *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( - 2, {-1, -1}, - /* VarArgsArePassed */ true)})); + *llvm::MDNode::get( + Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, + /* VarArgsArePassed */ true)})); } } - Builder.CreateCall(RTLFn, RealArgs); + OI.PostOutlineCB = [=](Function &OutlinedFn) { + // Add some known attributes. + OutlinedFn.addParamAttr(0, Attribute::NoAlias); + OutlinedFn.addParamAttr(1, Attribute::NoAlias); + OutlinedFn.addFnAttr(Attribute::NoUnwind); + OutlinedFn.addFnAttr(Attribute::NoRecurse); - LLVM_DEBUG(dbgs() << "With fork_call placed: " - << *Builder.GetInsertBlock()->getParent() << "\n"); + assert(OutlinedFn.arg_size() >= 2 && + "Expected at least tid and bounded tid as arguments"); + unsigned NumCapturedVars = + OutlinedFn.arg_size() - /* tid & bounded tid */ 2; - InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); - InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); - UI->eraseFromParent(); + CallInst *CI = cast(OutlinedFn.user_back()); + CI->getParent()->setName("omp_parallel"); + Builder.SetInsertPoint(CI); - // Initialize the local TID stack location with the argument value. - Builder.SetInsertPoint(PrivTID); - Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin(); - Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = { + Ident, Builder.getInt32(NumCapturedVars), + Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; - // If no "if" clause was present we do not need the call created during - // outlining, otherwise we reuse it in the serialized parallel region. - if (!ElseTI) { - CI->eraseFromParent(); - } else { + SmallVector RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); - // If an "if" clause was present we are now generating the serialized - // version into the "else" branch. - Builder.SetInsertPoint(ElseTI); + Builder.CreateCall(RTLFn, RealArgs); - // Build calls __kmpc_serialized_parallel(&Ident, GTid); - Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), - SerializedParallelCallArgs); - - // OutlinedFn(>id, &zero, CapturedStruct); - CI->removeFromParent(); - Builder.Insert(CI); - - // __kmpc_end_serialized_parallel(&Ident, GTid); - Value *EndArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), - EndArgs); - - LLVM_DEBUG(dbgs() << "With serialized parallel region: " + LLVM_DEBUG(dbgs() << "With fork_call placed: " << *Builder.GetInsertBlock()->getParent() << "\n"); - } + + InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); + + // Initialize the local TID stack location with the argument value. + Builder.SetInsertPoint(PrivTID); + Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); + Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + + // If no "if" clause was present we do not need the call created during + // outlining, otherwise we reuse it in the serialized parallel region. + if (!ElseTI) { + CI->eraseFromParent(); + } else { + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseTI); + + // Build calls __kmpc_serialized_parallel(&Ident, GTid); + Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + CI->removeFromParent(); + Builder.Insert(CI); + + // __kmpc_end_serialized_parallel(&Ident, GTid); + Value *EndArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), + EndArgs); + + LLVM_DEBUG(dbgs() << "With serialized parallel region: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + } + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); + }; // Adjust the finalization stack, verify the adjustment, and call the - // finalize function a last time to finalize values between the pre-fini block - // and the exit block if we left the parallel "the normal way". + // finalize function a last time to finalize values between the pre-fini + // block and the exit block if we left the parallel "the normal way". auto FiniInfo = FinalizationStack.pop_back_val(); (void)FiniInfo; assert(FiniInfo.DK == OMPD_parallel && @@ -618,15 +651,17 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); assert(PreFiniTI->getNumSuccessors() == 1 && - PreFiniTI->getSuccessor(0)->size() == 1 && - isa(PreFiniTI->getSuccessor(0)->getTerminator()) && + PreFiniTI->getSuccessor(0) == PRegExitBB && "Unexpected CFG structure!"); InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); FiniCB(PreFiniIP); - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); + InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); + UI->eraseFromParent(); + + // Register the outlined info. + addOutlineInfo(std::move(OI)); return AfterIP; } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 210cd7d2647..e72e17f495a 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -1405,11 +1405,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, DISubprogram *OldSP = OldFunc.getSubprogram(); LLVMContext &Ctx = OldFunc.getContext(); - // See llvm.org/PR44560, OpenMP passes an invalid subprogram to CodeExtractor. - bool NeedWorkaroundForOpenMPIRBuilderBug = - OldSP && OldSP->getRetainedNodes()->isTemporary(); - - if (!OldSP || NeedWorkaroundForOpenMPIRBuilderBug) { + if (!OldSP) { // Erase any debug info the new function contains. stripDebugInfo(NewFunc); // Make sure the old function doesn't contain any non-local metadata refs. diff --git a/unittests/Frontend/OpenMPIRBuilderTest.cpp b/unittests/Frontend/OpenMPIRBuilderTest.cpp index c6a51f6b1af..4bbd9667c4c 100644 --- a/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -96,7 +96,7 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) { EXPECT_EQ(cast(Barrier)->getArgOperand(1), GTID); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, CreateCancel) { @@ -151,7 +151,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { OMPBuilder.popFinalizationCB(); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { @@ -212,7 +212,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { OMPBuilder.popFinalizationCB(); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { @@ -267,7 +267,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { OMPBuilder.popFinalizationCB(); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, DbgLoc) { @@ -362,9 +362,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; - IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( - Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); - + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); @@ -372,10 +372,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_NE(PrivAI, nullptr); Function *OutlinedFn = PrivAI->getFunction(); EXPECT_NE(F, OutlinedFn); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse)); EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); @@ -470,6 +472,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); + OMPBuilder.finalize(); EXPECT_NE(PrivAI, nullptr); Function *OutlinedFn = PrivAI->getFunction(); @@ -595,6 +598,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); + OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs()));