mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[OpenMPIRBuilder] Implement collapseLoops.
The collapseLoops method implements a transformations facilitating the implementation of the collapse-clause. It takes a list of loops from a loop nest and reduces it to a single loop that can be used by other methods that are implemented on just a single loop, such as createStaticWorkshareLoop. This patch shares some changes with D92974 (such as adding some getters to CanonicalLoopNest), used by both patches. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93268
This commit is contained in:
parent
0adbe76ec7
commit
930857b772
@ -274,6 +274,70 @@ public:
|
||||
InsertPointTy ComputeIP = {},
|
||||
const Twine &Name = "loop");
|
||||
|
||||
/// Collapse a loop nest into a single loop.
|
||||
///
|
||||
/// Merges loops of a loop nest into a single CanonicalLoopNest representation
|
||||
/// that has the same number of innermost loop iterations as the origin loop
|
||||
/// nest. The induction variables of the input loops are derived from the
|
||||
/// collapsed loop's induction variable. This is intended to be used to
|
||||
/// implement OpenMP's collapse clause. Before applying a directive,
|
||||
/// collapseLoops normalizes a loop nest to contain only a single loop and the
|
||||
/// directive's implementation does not need to handle multiple loops itself.
|
||||
/// This does not remove the need to handle all loop nest handling by
|
||||
/// directives, such as the ordered(<n>) clause or the simd schedule-clause
|
||||
/// modifier of the worksharing-loop directive.
|
||||
///
|
||||
/// Example:
|
||||
/// \code
|
||||
/// for (int i = 0; i < 7; ++i) // Canonical loop "i"
|
||||
/// for (int j = 0; j < 9; ++j) // Canonical loop "j"
|
||||
/// body(i, j);
|
||||
/// \endcode
|
||||
///
|
||||
/// After collapsing with Loops={i,j}, the loop is changed to
|
||||
/// \code
|
||||
/// for (int ij = 0; ij < 63; ++ij) {
|
||||
/// int i = ij / 9;
|
||||
/// int j = ij % 9;
|
||||
/// body(i, j);
|
||||
/// }
|
||||
/// \endcode
|
||||
///
|
||||
/// In the current implementation, the following limitations apply:
|
||||
///
|
||||
/// * All input loops have an induction variable of the same type.
|
||||
///
|
||||
/// * The collapsed loop will have the same trip count integer type as the
|
||||
/// input loops. Therefore it is possible that the collapsed loop cannot
|
||||
/// represent all iterations of the input loops. For instance, assuming a
|
||||
/// 32 bit integer type, and two input loops both iterating 2^16 times, the
|
||||
/// theoretical trip count of the collapsed loop would be 2^32 iteration,
|
||||
/// which cannot be represented in an 32-bit integer. Behavior is undefined
|
||||
/// in this case.
|
||||
///
|
||||
/// * The trip counts of every input loop must be available at \p ComputeIP.
|
||||
/// Non-rectangular loops are not yet supported.
|
||||
///
|
||||
/// * At each nest level, code between a surrounding loop and its nested loop
|
||||
/// is hoisted into the loop body, and such code will be executed more
|
||||
/// often than before collapsing (or not at all if any inner loop iteration
|
||||
/// has a trip count of 0). This is permitted by the OpenMP specification.
|
||||
///
|
||||
/// \param DL Debug location for instructions added for collapsing,
|
||||
/// such as instructions to compute derive the input loop's
|
||||
/// induction variables.
|
||||
/// \param Loops Loops in the loop nest to collapse. Loops are specified
|
||||
/// from outermost-to-innermost and every control flow of a
|
||||
/// loop's body must pass through its directly nested loop.
|
||||
/// \param ComputeIP Where additional instruction that compute the collapsed
|
||||
/// trip count. If not set, defaults to before the generated
|
||||
/// loop.
|
||||
///
|
||||
/// \returns The CanonicalLoopInfo object representing the collapsed loop.
|
||||
CanonicalLoopInfo *collapseLoops(DebugLoc DL,
|
||||
ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
InsertPointTy ComputeIP);
|
||||
|
||||
/// Modifies the canonical loop to be a statically-scheduled workshare loop.
|
||||
///
|
||||
/// This takes a \p LoopInfo representing a canonical loop, such as the one
|
||||
|
@ -1225,6 +1225,127 @@ static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
|
||||
DeleteDeadBlocks(BBVec);
|
||||
}
|
||||
|
||||
CanonicalLoopInfo *
|
||||
OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
InsertPointTy ComputeIP) {
|
||||
assert(Loops.size() >= 1 && "At least one loop required");
|
||||
size_t NumLoops = Loops.size();
|
||||
|
||||
// Nothing to do if there is already just one loop.
|
||||
if (NumLoops == 1)
|
||||
return Loops.front();
|
||||
|
||||
CanonicalLoopInfo *Outermost = Loops.front();
|
||||
CanonicalLoopInfo *Innermost = Loops.back();
|
||||
BasicBlock *OrigPreheader = Outermost->getPreheader();
|
||||
BasicBlock *OrigAfter = Outermost->getAfter();
|
||||
Function *F = OrigPreheader->getParent();
|
||||
|
||||
// Setup the IRBuilder for inserting the trip count computation.
|
||||
Builder.SetCurrentDebugLocation(DL);
|
||||
if (ComputeIP.isSet())
|
||||
Builder.restoreIP(ComputeIP);
|
||||
else
|
||||
Builder.restoreIP(Outermost->getPreheaderIP());
|
||||
|
||||
// Derive the collapsed' loop trip count.
|
||||
// TODO: Find common/largest indvar type.
|
||||
Value *CollapsedTripCount = nullptr;
|
||||
for (CanonicalLoopInfo *L : Loops) {
|
||||
Value *OrigTripCount = L->getTripCount();
|
||||
if (!CollapsedTripCount) {
|
||||
CollapsedTripCount = OrigTripCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: Enable UndefinedSanitizer to diagnose an overflow here.
|
||||
CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
|
||||
{}, /*HasNUW=*/true);
|
||||
}
|
||||
|
||||
// Create the collapsed loop control flow.
|
||||
CanonicalLoopInfo *Result =
|
||||
createLoopSkeleton(DL, CollapsedTripCount, F,
|
||||
OrigPreheader->getNextNode(), OrigAfter, "collapsed");
|
||||
|
||||
// Build the collapsed loop body code.
|
||||
// Start with deriving the input loop induction variables from the collapsed
|
||||
// one, using a divmod scheme. To preserve the original loops' order, the
|
||||
// innermost loop use the least significant bits.
|
||||
Builder.restoreIP(Result->getBodyIP());
|
||||
|
||||
Value *Leftover = Result->getIndVar();
|
||||
SmallVector<Value *> NewIndVars;
|
||||
NewIndVars.set_size(NumLoops);
|
||||
for (int i = NumLoops - 1; i >= 1; --i) {
|
||||
Value *OrigTripCount = Loops[i]->getTripCount();
|
||||
|
||||
Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
|
||||
NewIndVars[i] = NewIndVar;
|
||||
|
||||
Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
|
||||
}
|
||||
// Outermost loop gets all the remaining bits.
|
||||
NewIndVars[0] = Leftover;
|
||||
|
||||
// Construct the loop body control flow.
|
||||
// We progressively construct the branch structure following in direction of
|
||||
// the control flow, from the leading in-between code, the loop nest body, the
|
||||
// trailing in-between code, and rejoining the collapsed loop's latch.
|
||||
// ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
|
||||
// the ContinueBlock is set, continue with that block. If ContinuePred, use
|
||||
// its predecessors as sources.
|
||||
BasicBlock *ContinueBlock = Result->getBody();
|
||||
BasicBlock *ContinuePred = nullptr;
|
||||
auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
|
||||
BasicBlock *NextSrc) {
|
||||
if (ContinueBlock)
|
||||
redirectTo(ContinueBlock, Dest, DL);
|
||||
else
|
||||
redirectAllPredecessorsTo(ContinuePred, Dest, DL);
|
||||
|
||||
ContinueBlock = nullptr;
|
||||
ContinuePred = NextSrc;
|
||||
};
|
||||
|
||||
// The code before the nested loop of each level.
|
||||
// Because we are sinking it into the nest, it will be executed more often
|
||||
// that the original loop. More sophisticated schemes could keep track of what
|
||||
// the in-between code is and instantiate it only once per thread.
|
||||
for (size_t i = 0; i < NumLoops - 1; ++i)
|
||||
ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
|
||||
|
||||
// Connect the loop nest body.
|
||||
ContinueWith(Innermost->getBody(), Innermost->getLatch());
|
||||
|
||||
// The code after the nested loop at each level.
|
||||
for (size_t i = NumLoops - 1; i > 0; --i)
|
||||
ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
|
||||
|
||||
// Connect the finished loop to the collapsed loop latch.
|
||||
ContinueWith(Result->getLatch(), nullptr);
|
||||
|
||||
// Replace the input loops with the new collapsed loop.
|
||||
redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
|
||||
redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
|
||||
|
||||
// Replace the input loop indvars with the derived ones.
|
||||
for (size_t i = 0; i < NumLoops; ++i)
|
||||
Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
|
||||
|
||||
// Remove unused parts of the input loops.
|
||||
SmallVector<BasicBlock *, 12> OldControlBBs;
|
||||
OldControlBBs.reserve(6 * Loops.size());
|
||||
for (CanonicalLoopInfo *Loop : Loops)
|
||||
Loop->collectControlBlocks(OldControlBBs);
|
||||
removeUnusedBlocksFromParent(OldControlBBs);
|
||||
|
||||
#ifndef NDEBUG
|
||||
Result->assertOK();
|
||||
#endif
|
||||
return Result;
|
||||
}
|
||||
|
||||
std::vector<CanonicalLoopInfo *>
|
||||
OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
ArrayRef<Value *> TileSizes) {
|
||||
|
@ -1160,6 +1160,99 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
Type *LCTy = F->getArg(0)->getType();
|
||||
Constant *One = ConstantInt::get(LCTy, 1);
|
||||
Constant *Two = ConstantInt::get(LCTy, 2);
|
||||
Value *OuterTripCount =
|
||||
Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
|
||||
Value *InnerTripCount =
|
||||
Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
|
||||
|
||||
// Fix an insertion point for ComputeIP.
|
||||
BasicBlock *LoopNextEnter =
|
||||
BasicBlock::Create(M->getContext(), "loopnest.enter", F,
|
||||
Builder.GetInsertBlock()->getNextNode());
|
||||
BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
|
||||
InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
|
||||
|
||||
Builder.SetInsertPoint(LoopNextEnter);
|
||||
OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
|
||||
|
||||
CanonicalLoopInfo *InnerLoop = nullptr;
|
||||
CallInst *InbetweenLead = nullptr;
|
||||
CallInst *InbetweenTrail = nullptr;
|
||||
CallInst *Call = nullptr;
|
||||
auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
|
||||
Builder.restoreIP(OuterCodeGenIP);
|
||||
InbetweenLead =
|
||||
createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
|
||||
|
||||
auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
|
||||
Value *InnerLC) {
|
||||
Builder.restoreIP(InnerCodeGenIP);
|
||||
Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
|
||||
};
|
||||
InnerLoop = OMPBuilder.createCanonicalLoop(
|
||||
Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner");
|
||||
|
||||
Builder.restoreIP(InnerLoop->getAfterIP());
|
||||
InbetweenTrail =
|
||||
createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
|
||||
};
|
||||
CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
|
||||
OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer");
|
||||
|
||||
// Finish the function.
|
||||
Builder.restoreIP(OuterLoop->getAfterIP());
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
CanonicalLoopInfo *Collapsed =
|
||||
OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
|
||||
|
||||
OMPBuilder.finalize();
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
|
||||
// Verify control flow and BB order.
|
||||
BasicBlock *RefOrder[] = {
|
||||
Collapsed->getPreheader(), Collapsed->getHeader(),
|
||||
Collapsed->getCond(), Collapsed->getBody(),
|
||||
InbetweenLead->getParent(), Call->getParent(),
|
||||
InbetweenTrail->getParent(), Collapsed->getLatch(),
|
||||
Collapsed->getExit(), Collapsed->getAfter(),
|
||||
};
|
||||
EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
|
||||
EXPECT_TRUE(verifyListOrder(F, RefOrder));
|
||||
|
||||
// Verify the total trip count.
|
||||
auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
|
||||
EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
|
||||
EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
|
||||
|
||||
// Verify the changed indvar.
|
||||
auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
|
||||
EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
|
||||
EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
|
||||
EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
|
||||
EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
|
||||
|
||||
auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
|
||||
EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
|
||||
EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
|
||||
EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
|
||||
EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
|
||||
|
||||
EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
|
||||
EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
|
Loading…
Reference in New Issue
Block a user