From 4e50d0475acca729b5f4049022829d1ae5efb9f0 Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Fri, 22 Mar 2019 18:27:13 +0000 Subject: [PATCH] IR: Support parsing numeric block ids, and emit them in textual output. Just as as llvm IR supports explicitly specifying numeric value ids for instructions, and emits them by default in textual output, now do the same for blocks. This is a slightly incompatible change in the textual IR format. Previously, llvm would parse numeric labels as string names. E.g. define void @f() { br label %"55" 55: ret void } defined a label *named* "55", even without needing to be quoted, while the reference required quoting. Now, if you intend a block label which looks like a value number to be a name, you must quote it in the definition too (e.g. `"55":`). Previously, llvm would print nameless blocks only as a comment, and would omit it if there was no predecessor. This could cause confusion for readers of the IR, just as unnamed instructions did prior to the addition of "%5 = " syntax, back in 2008 (PR2480). Now, it will always print a label for an unnamed block, with the exception of the entry block. (IMO it may be better to print it for the entry-block as well. However, that requires updating many more tests.) Thus, the following is supported, and is the canonical printing: define i32 @f(i32, i32) { %3 = add i32 %0, %1 br label %4 4: ret i32 %3 } New test cases covering this behavior are added, and other tests updated as required. Differential Revision: https://reviews.llvm.org/D58548 llvm-svn: 356789 --- docs/LangRef.rst | 12 +-- lib/AsmParser/LLLexer.cpp | 12 ++- lib/AsmParser/LLParser.cpp | 33 +++++-- lib/AsmParser/LLParser.h | 2 +- lib/AsmParser/LLToken.h | 1 + lib/IR/AsmWriter.cpp | 9 +- .../Analysis/DominanceFrontier/new_pm_test.ll | 58 ++++++------- test/Analysis/RegionInfo/cond_loop.ll | 12 +-- .../RegionInfo/condition_forward_edge.ll | 8 +- .../RegionInfo/condition_same_exit.ll | 10 +-- test/Analysis/RegionInfo/condition_simple.ll | 10 +-- test/Analysis/RegionInfo/infinite_loop.ll | 10 +-- test/Analysis/RegionInfo/infinite_loop_2.ll | 18 ++-- test/Analysis/RegionInfo/infinite_loop_3.ll | 30 +++---- test/Analysis/RegionInfo/infinite_loop_4.ll | 32 +++---- test/Analysis/RegionInfo/infinite_loop_5_a.ll | 14 +-- test/Analysis/RegionInfo/infinite_loop_5_b.ll | 16 ++-- test/Analysis/RegionInfo/infinite_loop_5_c.ll | 14 +-- .../RegionInfo/loop_with_condition.ll | 20 ++--- test/Analysis/RegionInfo/mix_1.ll | 38 ++++---- test/Analysis/RegionInfo/paper.ll | 28 +++--- test/Assembler/block-labels.ll | 48 +++++++++++ test/Assembler/invalid-block-label-num.ll | 7 ++ test/CodeGen/X86/atomic-pointer.ll | 6 +- .../asan-masked-load-store.ll | 32 +++---- .../stack-poisoning-and-lifetime-be.ll | 6 +- .../stack-poisoning-and-lifetime.ll | 6 +- .../AddressSanitizer/stack_dynamic_alloca.ll | 6 +- .../MemorySanitizer/check_access_address.ll | 4 +- .../MemorySanitizer/msan_basic.ll | 8 +- .../MemorySanitizer/msan_kernel_basic.ll | 86 +++++++++---------- .../MemorySanitizer/msan_x86_bts_asm.ll | 2 +- .../MemorySanitizer/store-origin.ll | 4 +- .../SanitizerCoverage/stack-depth.ll | 2 +- test/Transforms/GVNHoist/pr36787.ll | 26 +++--- .../LowerSwitch/2014-06-23-PHIlowering.ll | 14 +-- 36 files changed, 365 insertions(+), 279 deletions(-) create mode 100644 test/Assembler/block-labels.ll create mode 100644 test/Assembler/invalid-block-label-num.ll diff --git a/docs/LangRef.rst b/docs/LangRef.rst index a43b0ee3c45..66490b85834 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -741,11 +741,13 @@ A function definition contains a list of basic blocks, forming the CFG (Control Flow Graph) for the function. Each basic block may optionally start with a label (giving the basic block a symbol table entry), contains a list of instructions, and ends with a :ref:`terminator ` instruction (such as a branch or -function return). If an explicit label is not provided, a block is assigned an -implicit numbered label, using the next value from the same counter as used for -unnamed temporaries (:ref:`see above`). For example, if a function -entry block does not have an explicit label, it will be assigned label "%0", -then the first unnamed temporary in that block will be "%1", etc. +function return). If an explicit label name is not provided, a block is assigned +an implicit numbered label, using the next value from the same counter as used +for unnamed temporaries (:ref:`see above`). For example, if a +function entry block does not have an explicit label, it will be assigned label +"%0", then the first unnamed temporary in that block will be "%1", etc. If a +numeric label is explicitly specified, it must match the numeric label that +would be used implicitly. The first basic block in a function is special in two ways: it is immediately executed on entrance to the function, and it is not allowed diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 9e5bfb1f72b..bc3776da966 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -1048,7 +1048,17 @@ lltok::Kind LLLexer::LexDigitOrNegative() { for (; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; - // Check to see if this really is a label afterall, e.g. "-1:". + // Check if this is a fully-numeric label: + if (isdigit(TokStart[0]) && CurPtr[0] == ':') { + uint64_t Val = atoull(TokStart, CurPtr); + ++CurPtr; // Skip the colon. + if ((unsigned)Val != Val) + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::LabelID; + } + + // Check to see if this really is a string label, e.g. "-1:". if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 71315f3c0fb..1ab2702941d 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2926,13 +2926,27 @@ BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) { /// unnamed. If there is an error, this returns null otherwise it returns /// the block being defined. BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name, - LocTy Loc) { + int NameID, LocTy Loc) { BasicBlock *BB; - if (Name.empty()) + if (Name.empty()) { + if (NameID != -1 && unsigned(NameID) != NumberedVals.size()) { + P.Error(Loc, "label expected to be numbered '" + + Twine(NumberedVals.size()) + "'"); + return nullptr; + } BB = GetBB(NumberedVals.size(), Loc); - else + if (!BB) { + P.Error(Loc, "unable to create block numbered '" + + Twine(NumberedVals.size()) + "'"); + return nullptr; + } + } else { BB = GetBB(Name, Loc); - if (!BB) return nullptr; // Already diagnosed error. + if (!BB) { + P.Error(Loc, "unable to create block named '" + Name + "'"); + return nullptr; + } + } // Move the block to the end of the function. Forward ref'd blocks are // inserted wherever they happen to be referenced. @@ -5489,20 +5503,23 @@ bool LLParser::ParseFunctionBody(Function &Fn) { } /// ParseBasicBlock -/// ::= LabelStr? Instruction* +/// ::= (LabelStr|LabelID)? Instruction* bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { // If this basic block starts out with a name, remember it. std::string Name; + int NameID = -1; LocTy NameLoc = Lex.getLoc(); if (Lex.getKind() == lltok::LabelStr) { Name = Lex.getStrVal(); Lex.Lex(); + } else if (Lex.getKind() == lltok::LabelID) { + NameID = Lex.getUIntVal(); + Lex.Lex(); } - BasicBlock *BB = PFS.DefineBB(Name, NameLoc); + BasicBlock *BB = PFS.DefineBB(Name, NameID, NameLoc); if (!BB) - return Error(NameLoc, - "unable to create block named '" + Name + "'"); + return true; std::string NameStr; diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 95aea0c775a..452492b862a 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -445,7 +445,7 @@ namespace llvm { /// DefineBB - Define the specified basic block, which is either named or /// unnamed. If there is an error, this returns null otherwise it returns /// the block being defined. - BasicBlock *DefineBB(const std::string &Name, LocTy Loc); + BasicBlock *DefineBB(const std::string &Name, int NameID, LocTy Loc); bool resolveForwardRefBlockAddresses(); }; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 50bdf05092a..80a1eb99e35 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -422,6 +422,7 @@ enum Kind { kw_varFlags, // Unsigned Valued tokens (UIntVal). + LabelID, // 42: GlobalID, // @42 LocalVarID, // %42 AttrGrpID, // #42 diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 07dea403d56..ad6c2c8e883 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -3482,23 +3482,24 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { /// printBasicBlock - This member is called for each basic block in a method. void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { + bool IsEntryBlock = BB == &BB->getParent()->getEntryBlock(); if (BB->hasName()) { // Print out the label if it exists... Out << "\n"; PrintLLVMName(Out, BB->getName(), LabelPrefix); Out << ':'; - } else if (!BB->use_empty()) { // Don't print block # of no uses... - Out << "\n;