From 45b8bf3b33e58e902436f8aaaee6c165e6828070 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 5 Mar 2020 14:55:08 +0000 Subject: [PATCH] [VPlan] Use consecutive numbers to print VPValues instead of addresses. Currently when printing VPValues we use the object address, which makes it hard to distinguish VPValues as they usually are large numbers with varying distance between them. This patch adds a simple slot tracker, similar to the ModuleSlotTracker used for IR values. In order to dump a VPValue or anything containing a VPValue, a slot tracker for the enclosing VPlan needs to be created. The existing VPlanPrinter can take care of that for the existing code. We assign consecutive numbers to each VPValue we encounter in a reverse post order traversal of the VPlan. Reviewers: rengolin, hsaito, fhahn, Ayal, dorit, gilr Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D73078 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 7 +- lib/Transforms/Vectorize/VPlan.cpp | 134 +++++++++++++++---- lib/Transforms/Vectorize/VPlan.h | 52 ++++--- lib/Transforms/Vectorize/VPlanValue.h | 39 +++++- unittests/Transforms/Vectorize/VPlanTest.cpp | 71 ++++++++++ 5 files changed, 253 insertions(+), 50 deletions(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index ffee9d3160e..49e38c37a46 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7347,16 +7347,17 @@ Value *LoopVectorizationPlanner::VPCallbackILV::getOrCreateScalarValue( return ILV.getOrCreateScalarValue(V, Instance); } -void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; IG->getInsertPos()->printAsOperand(O, false); O << ", "; - getAddr()->printAsOperand(O); + getAddr()->printAsOperand(O, SlotTracker); VPValue *Mask = getMask(); if (Mask) { O << ", "; - Mask->printAsOperand(O); + Mask->printAsOperand(O, SlotTracker); } O << "\\l\""; for (unsigned i = 0; i < IG->getFactor(); ++i) diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 02150f8c2fb..45be91baec8 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -49,13 +49,20 @@ extern cl::opt EnableVPlanNativePath; #define DEBUG_TYPE "vplan" raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { - if (const VPInstruction *Instr = dyn_cast(&V)) - Instr->print(OS); - else - V.printAsOperand(OS); + const VPInstruction *Instr = dyn_cast(&V); + VPSlotTracker SlotTracker( + (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); + V.print(OS, SlotTracker); return OS; } +void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { + if (const VPInstruction *Instr = dyn_cast(this)) + Instr->print(OS, SlotTracker); + else + printAsOperand(OS, SlotTracker); +} + // Get the top-most entry block of \p Start. This is the entry block of the // containing VPlan. This function is templated to support both const and non-const blocks template static T *getPlanEntry(T *Start) { @@ -384,14 +391,20 @@ void VPInstruction::execute(VPTransformState &State) { generateInstruction(State, Part); } -void VPInstruction::print(raw_ostream &O, const Twine &Indent) const { +void VPInstruction::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"EMIT "; - print(O); + print(O, SlotTracker); O << "\\l\""; } void VPInstruction::print(raw_ostream &O) const { - printAsOperand(O); + VPSlotTracker SlotTracker(getParent()->getPlan()); + print(O, SlotTracker); +} + +void VPInstruction::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { + printAsOperand(O, SlotTracker); O << " = "; switch (getOpcode()) { @@ -413,7 +426,7 @@ void VPInstruction::print(raw_ostream &O) const { for (const VPValue *Operand : operands()) { O << " "; - Operand->printAsOperand(O); + Operand->printAsOperand(O, SlotTracker); } } @@ -567,10 +580,14 @@ void VPlanPrinter::dump() { OS << "\\n" << DOT::EscapeString(Plan.getName()); if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) { OS << ", where:"; - if (Plan.BackedgeTakenCount) - OS << "\\n" << *Plan.BackedgeTakenCount << " := BackedgeTakenCount"; + if (Plan.BackedgeTakenCount) { + OS << "\\n"; + Plan.BackedgeTakenCount->print(OS, SlotTracker); + OS << " := BackedgeTakenCount"; + } for (auto Entry : Plan.Value2VPValue) { - OS << "\\n" << *Entry.second; + OS << "\\n"; + Entry.second->print(OS, SlotTracker); OS << DOT::EscapeString(" := "); Entry.first->printAsOperand(OS, false); } @@ -637,25 +654,25 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { if (Pred) { OS << " +\n" << Indent << " \"BlockPredicate: "; if (const VPInstruction *PredI = dyn_cast(Pred)) { - PredI->printAsOperand(OS); + PredI->printAsOperand(OS, SlotTracker); OS << " (" << DOT::EscapeString(PredI->getParent()->getName()) << ")\\l\""; } else - Pred->printAsOperand(OS); + Pred->printAsOperand(OS, SlotTracker); } for (const VPRecipeBase &Recipe : *BasicBlock) - Recipe.print(OS, Indent); + Recipe.print(OS, Indent, SlotTracker); // Dump the condition bit. const VPValue *CBV = BasicBlock->getCondBit(); if (CBV) { OS << " +\n" << Indent << " \"CondBit: "; if (const VPInstruction *CBI = dyn_cast(CBV)) { - CBI->printAsOperand(OS); + CBI->printAsOperand(OS, SlotTracker); OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\""; } else { - CBV->printAsOperand(OS); + CBV->printAsOperand(OS, SlotTracker); OS << "\""; } } @@ -702,14 +719,15 @@ void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { O << DOT::EscapeString(IngredientString); } -void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN\\l\""; for (auto &Instr : make_range(Begin, End)) O << " +\n" << Indent << "\" " << VPlanIngredient(&Instr) << "\\l\""; } -void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, - const Twine &Indent) const { +void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN-INDUCTION"; if (Trunc) { O << "\\l\""; @@ -719,7 +737,8 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, O << " " << VPlanIngredient(IV) << "\\l\""; } -void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN-GEP "; O << (IsPtrLoopInvariant ? "Inv" : "Var"); size_t IndicesNumber = IsIndexLoopInvariant.size(); @@ -729,11 +748,13 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent) const { O << " +\n" << Indent << "\" " << VPlanIngredient(GEP) << "\\l\""; } -void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\""; } -void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"BLEND "; Phi->printAsOperand(O, false); O << " ="; @@ -747,13 +768,14 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const { O << " "; Phi->getIncomingValue(I)->printAsOperand(O, false); O << "/"; - User->getOperand(I)->printAsOperand(O); + User->getOperand(I)->printAsOperand(O, SlotTracker); } } O << "\\l\""; } -void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") << VPlanIngredient(Ingredient); @@ -762,21 +784,22 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const { O << "\\l\""; } -void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst) << "\\l\""; } -void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, - const Twine &Indent) const { +void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr); O << ", "; - getAddr()->printAsOperand(O); + getAddr()->printAsOperand(O, SlotTracker); VPValue *Mask = getMask(); if (Mask) { O << ", "; - Mask->printAsOperand(O); + Mask->printAsOperand(O, SlotTracker); } O << "\\l\""; } @@ -790,6 +813,14 @@ void VPValue::replaceAllUsesWith(VPValue *New) { User->setOperand(I, New); } +void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { + unsigned Slot = Tracker.getSlot(this); + if (Slot == unsigned(-1)) + OS << ""; + else + OS << "%vp" << Tracker.getSlot(this); +} + void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, InterleavedAccessInfo &IAI) { @@ -835,3 +866,48 @@ VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, Old2NewTy Old2New; visitRegion(cast(Plan.getEntry()), Old2New, IAI); } + +void VPSlotTracker::assignSlot(const VPValue *V) { + assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!"); + Slots[V] = NextSlot++; +} + +void VPSlotTracker::assignSlots(const VPBlockBase *VPBB) { + if (auto *Region = dyn_cast(VPBB)) + assignSlots(Region); + else + assignSlots(cast(VPBB)); +} + +void VPSlotTracker::assignSlots(const VPRegionBlock *Region) { + ReversePostOrderTraversal RPOT(Region->getEntry()); + for (const VPBlockBase *Block : RPOT) + assignSlots(Block); +} + +void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) { + for (const VPRecipeBase &Recipe : *VPBB) { + if (const auto *VPI = dyn_cast(&Recipe)) + assignSlot(VPI); + } +} + +void VPSlotTracker::assignSlots(const VPlan &Plan) { + + for (const VPValue *V : Plan.VPExternalDefs) + assignSlot(V); + + for (auto &E : Plan.Value2VPValue) + if (!isa(E.second)) + assignSlot(E.second); + + for (const VPValue *V : Plan.VPCBVs) + assignSlot(V); + + if (Plan.BackedgeTakenCount) + assignSlot(Plan.BackedgeTakenCount); + + ReversePostOrderTraversal RPOT(Plan.getEntry()); + for (const VPBlockBase *Block : RPOT) + assignSlots(Block); +} diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 914dfe603c5..3cd4464c1ef 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -633,7 +633,9 @@ public: virtual void execute(struct VPTransformState &State) = 0; /// Each recipe prints itself. - virtual void print(raw_ostream &O, const Twine &Indent) const = 0; + void print(raw_ostream &O, const Twine &Indent); + virtual void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const = 0; /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. @@ -719,10 +721,12 @@ public: void execute(VPTransformState &State) override; /// Print the Recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; /// Print the VPInstruction. void print(raw_ostream &O) const; + void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; /// Return true if this instruction may modify memory. bool mayWriteToMemory() const { @@ -768,7 +772,8 @@ public: } /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for handling GEP instructions. @@ -798,7 +803,8 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for handling phi nodes of integer and floating-point inductions, @@ -823,7 +829,8 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for handling all phi nodes except for integer and FP inductions. @@ -844,7 +851,8 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for vectorizing a phi-node as a sequence of mask-based select @@ -875,7 +883,8 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// VPInterleaveRecipe is a recipe for transforming an interleave group of load @@ -915,7 +924,8 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; const InterleaveGroup *getInterleaveGroup() { return IG; } }; @@ -965,7 +975,8 @@ public: void setAlsoPack(bool Pack) { AlsoPack = Pack; } /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for generating conditional branches on the bits of a mask. @@ -989,10 +1000,11 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override { O << " +\n" << Indent << "\"BRANCH-ON-MASK "; if (User) - O << *User->getOperand(0); + User->getOperand(0)->print(O, SlotTracker); else O << " All-One"; O << "\\l\""; @@ -1024,7 +1036,8 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A Recipe for widening load/store operations. @@ -1064,7 +1077,8 @@ public: void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It @@ -1352,6 +1366,7 @@ struct GraphTraits> } }; +class VPSlotTracker; /// VPlan models a candidate for vectorization, encoding various decisions take /// to produce efficient output IR, including which branches, basic-blocks and /// output IR instructions to generate, and their cost. VPlan holds a @@ -1359,6 +1374,7 @@ struct GraphTraits> /// VPBlock. class VPlan { friend class VPlanPrinter; + friend class VPSlotTracker; private: /// Hold the single entry to the Hierarchical CFG of the VPlan. @@ -1392,7 +1408,10 @@ private: SmallVector VPCBVs; public: - VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} + VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { + if (Entry) + Entry->setPlan(this); + } ~VPlan() { if (Entry) @@ -1496,7 +1515,10 @@ private: unsigned BID = 0; SmallDenseMap BlockID; - VPlanPrinter(raw_ostream &O, const VPlan &P) : OS(O), Plan(P) {} + VPSlotTracker SlotTracker; + + VPlanPrinter(raw_ostream &O, const VPlan &P) + : OS(O), Plan(P), SlotTracker(&P) {} /// Handle indentation. void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } diff --git a/lib/Transforms/Vectorize/VPlanValue.h b/lib/Transforms/Vectorize/VPlanValue.h index 464498c29d8..9004650fac8 100644 --- a/lib/Transforms/Vectorize/VPlanValue.h +++ b/lib/Transforms/Vectorize/VPlanValue.h @@ -31,6 +31,8 @@ namespace llvm { // Forward declarations. class VPUser; +class VPSlotTracker; + // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins // coming from the input IR, instructions which VPlan will generate if executed @@ -85,9 +87,8 @@ public: /// for any other purpose, as the values may change as LLVM evolves. unsigned getVPValueID() const { return SubclassID; } - void printAsOperand(raw_ostream &OS) const { - OS << "%vp" << (unsigned short)(unsigned long long)this; - } + void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const; + void print(raw_ostream &OS, VPSlotTracker &Tracker) const; unsigned getNumUsers() const { return Users.size(); } void addUser(VPUser &User) { Users.push_back(&User); } @@ -180,6 +181,38 @@ public: return const_operand_range(op_begin(), op_end()); } }; +class VPlan; +class VPBasicBlock; +class VPRegionBlock; + +/// This class can be used to assign consecutive numbers to all VPValues in a +/// VPlan and allows querying the numbering for printing, similar to the +/// ModuleSlotTracker for IR values. +class VPSlotTracker { +private: + DenseMap Slots; + unsigned NextSlot = 0; + + void assignSlots(const VPBlockBase *VPBB); + void assignSlots(const VPRegionBlock *Region); + void assignSlots(const VPBasicBlock *VPBB); + void assignSlot(const VPValue *V); + + void assignSlots(const VPlan &Plan); + +public: + VPSlotTracker(const VPlan *Plan) { + if (Plan) + assignSlots(*Plan); + } + + unsigned getSlot(const VPValue *V) const { + auto I = Slots.find(V); + if (I == Slots.end()) + return -1; + return I->second; + } +}; } // namespace llvm diff --git a/unittests/Transforms/Vectorize/VPlanTest.cpp b/unittests/Transforms/Vectorize/VPlanTest.cpp index 855016a1248..61df1f6288c 100644 --- a/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -11,6 +11,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "gtest/gtest.h" +#include namespace llvm { namespace { @@ -176,5 +177,75 @@ TEST(VPBasicBlockTest, getPlan) { } } +TEST(VPBasicBlockTest, print) { + VPInstruction *I1 = new VPInstruction(10, {}); + VPInstruction *I2 = new VPInstruction(1, {I1}); + VPInstruction *I3 = new VPInstruction(2, {I1, I2}); + + VPBasicBlock *VPBB1 = new VPBasicBlock(); + VPBB1->appendRecipe(I1); + VPBB1->appendRecipe(I2); + VPBB1->appendRecipe(I3); + + VPInstruction *I4 = new VPInstruction(4, {I3, I2}); + VPInstruction *I5 = new VPInstruction(5, {I1}); + VPBasicBlock *VPBB2 = new VPBasicBlock(); + VPBB2->appendRecipe(I4); + VPBB2->appendRecipe(I5); + + VPBlockUtils::connectBlocks(VPBB1, VPBB2); + + // Check printing an instruction without associated VPlan. + { + std::string I3Dump; + raw_string_ostream OS(I3Dump); + I3->print(OS); + OS.flush(); + EXPECT_EQ(" = br ", I3Dump); + } + + VPlan Plan; + Plan.setEntry(VPBB1); + std::string FullDump; + raw_string_ostream(FullDump) << Plan; + + EXPECT_EQ(R"(digraph VPlan { +graph [labelloc=t, fontsize=30; label="Vectorization Plan"] +node [shape=rect, fontname=Courier, fontsize=30] +edge [fontname=Courier, fontsize=30] +compound=true + N0 [label = + ":\n" + + "EMIT %vp0 = catchswitch\l" + + "EMIT %vp1 = ret %vp0\l" + + "EMIT %vp2 = br %vp0 %vp1\l" + ] + N0 -> N1 [ label=""] + N1 [label = + ":\n" + + "EMIT %vp3 = indirectbr %vp2 %vp1\l" + + "EMIT %vp4 = invoke %vp0\l" + ] +} +)", + FullDump); + + { + std::string I3Dump; + raw_string_ostream OS(I3Dump); + I3->print(OS); + OS.flush(); + EXPECT_EQ("%vp2 = br %vp0 %vp1", I3Dump); + } + + { + std::string I2Dump; + raw_string_ostream OS(I2Dump); + OS << *I2; + OS.flush(); + EXPECT_EQ("%vp1 = ret %vp0", I2Dump); + } +} + } // namespace } // namespace llvm