From c6d928b2f660fa77809d33c9373d6c9cbdeb7b8c Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 24 Oct 2018 20:23:57 +0000 Subject: [PATCH] [MC] Separate masm integer literal lexer support from inline asm Summary: This renames the IsParsingMSInlineAsm member variable of AsmLexer to LexMasmIntegers and moves it up to MCAsmLexer. This is the only behavior controlled by that variable. I added a public setter, so that it can be set from outside or from the llvm-mc command line. We may need to arrange things so that users can get this behavior from clang, but that's future work. I also put additional hex literal lexing functionality under this flag to fix PR32973. It appears that this hex literal parsing wasn't intended to be enabled in non-masm-style blocks. Now, masm integers (0b1101 and 0ABCh) work in __asm blocks from clang, but 0b label references work when using .intel_syntax in standalone .s files. However, 0b label references will *not* work from __asm blocks in clang. They will work from GCC inline asm blocks, which it sounds like is important for Crypto++ as mentioned in PR36144. Essentially, we only lex masm literals for inline asm blobs that use intel syntax. If the .intel_syntax directive is used inside a gnu-style inline asm statement, masm literals will not be lexed, which is compatible with gas and llvm-mc standalone .s assembly. This fixes PR36144 and PR32973. Reviewers: Gerolf, avt77 Subscribers: eraman, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D53535 llvm-svn: 345189 --- include/llvm/MC/MCParser/AsmLexer.h | 2 -- include/llvm/MC/MCParser/MCAsmLexer.h | 5 +++ .../AsmPrinter/AsmPrinterInlineAsm.cpp | 5 +-- lib/MC/MCParser/AsmLexer.cpp | 36 ++++++++++--------- lib/MC/MCParser/AsmParser.cpp | 4 ++- lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 -- test/MC/AArch64/macro-hex-int.s | 8 +++++ test/MC/X86/intel-syntax-hex.s | 2 +- test/MC/X86/pr27884.s | 2 +- test/tools/llvm-mca/X86/intel-syntax.s | 2 +- tools/llvm-mc/llvm-mc.cpp | 5 +++ 11 files changed, 47 insertions(+), 26 deletions(-) create mode 100644 test/MC/AArch64/macro-hex-int.s diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index 207183a69b0..2e9b8dfa3b2 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer { StringRef CurBuf; bool IsAtStartOfLine = true; bool IsAtStartOfStatement = true; - bool IsParsingMSInlineAsm = false; bool IsPeeking = false; protected: @@ -44,7 +43,6 @@ public: ~AsmLexer() override; void setBuffer(StringRef Buf, const char *ptr = nullptr); - void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; } StringRef LexUntilEndOfStatement() override; diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h index 8ff0df2a185..ea13d1cdc09 100644 --- a/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/include/llvm/MC/MCParser/MCAsmLexer.h @@ -50,6 +50,7 @@ protected: // Can only create subclasses. bool SkipSpace = true; bool AllowAtInIdentifier; bool IsAtStartOfStatement = true; + bool LexMasmIntegers = false; AsmCommentConsumer *CommentConsumer = nullptr; MCAsmLexer(); @@ -146,6 +147,10 @@ public: void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { this->CommentConsumer = CommentConsumer; } + + /// Set whether to lex masm-style binary and hex literals. They look like + /// 0b1101 and 0ABCh respectively. + void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 2920ac66290..62103e3107c 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -156,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo); + // Enable lexing Masm binary and hex integer literals in intel inline + // assembly. if (Dialect == InlineAsm::AD_Intel) - // We need this flag to be able to parse numbers like "0bH" - Parser->setParsingInlineAsm(true); + Parser->getLexer().setLexMasmIntegers(true); if (MF) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 74835fd70c0..c8d48f033f6 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -243,22 +243,26 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { // Look ahead to search for first non-hex digit, if it's [hH], then we treat the // integer as a hexadecimal, possibly with leading zeroes. -static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { - const char *FirstHex = nullptr; +static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, + bool LexHex) { + const char *FirstNonDec = nullptr; const char *LookAhead = CurPtr; while (true) { if (isDigit(*LookAhead)) { ++LookAhead; - } else if (isHexDigit(*LookAhead)) { - if (!FirstHex) - FirstHex = LookAhead; - ++LookAhead; } else { - break; + if (!FirstNonDec) + FirstNonDec = LookAhead; + + // Keep going if we are looking for a 'h' suffix. + if (LexHex && isHexDigit(*LookAhead)) + ++LookAhead; + else + break; } } - bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; - CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; + bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H'); + CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec; if (isHex) return 16; return DefaultRadix; @@ -281,7 +285,7 @@ static AsmToken intToken(StringRef Ref, APInt &Value) AsmToken AsmLexer::LexDigit() { // MASM-flavor binary integer: [01]+[bB] // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] - if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) { + if (LexMasmIntegers && isdigit(CurPtr[-1])) { const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; const char *OldCurPtr = CurPtr; @@ -320,7 +324,7 @@ AsmToken AsmLexer::LexDigit() { // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { - unsigned Radix = doLookAhead(CurPtr, 10); + unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); bool isHex = Radix == 16; // Check for floating point literals. if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { @@ -335,8 +339,8 @@ AsmToken AsmLexer::LexDigit() { return ReturnError(TokStart, !isHex ? "invalid decimal number" : "invalid hexdecimal number"); - // Consume the [bB][hH]. - if (Radix == 2 || Radix == 16) + // Consume the [hH]. + if (LexMasmIntegers && Radix == 16) ++CurPtr; // The darwin/x86 (and x86-64) assembler accepts and ignores type @@ -346,7 +350,7 @@ AsmToken AsmLexer::LexDigit() { return intToken(Result, Value); } - if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { + if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { ++CurPtr; // See if we actually have "0b" as part of something like "jmp 0b\n" if (!isDigit(CurPtr[0])) { @@ -395,7 +399,7 @@ AsmToken AsmLexer::LexDigit() { return ReturnError(TokStart, "invalid hexadecimal number"); // Consume the optional [hH]. - if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H')) + if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H')) ++CurPtr; // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL @@ -407,7 +411,7 @@ AsmToken AsmLexer::LexDigit() { // Either octal or hexadecimal. APInt Value(128, 0, true); - unsigned Radix = doLookAhead(CurPtr, 8); + unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers); bool isHex = Radix == 16; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 529f16525fe..3f7b507791e 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -229,7 +229,9 @@ public: void setParsingInlineAsm(bool V) override { ParsingInlineAsm = V; - Lexer.setParsingMSInlineAsm(V); + // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and + // hex integer literals. + Lexer.setLexMasmIntegers(V); } bool isParsingInlineAsm() override { return ParsingInlineAsm; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index e67daa5d857..4801078925c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3283,7 +3283,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); else if (IDVal.startswith(".att_syntax")) { - getParser().setParsingInlineAsm(false); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "prefix") Parser.Lex(); @@ -3296,7 +3295,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return false; } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); - getParser().setParsingInlineAsm(true); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "noprefix") Parser.Lex(); diff --git a/test/MC/AArch64/macro-hex-int.s b/test/MC/AArch64/macro-hex-int.s new file mode 100644 index 00000000000..0d697bce53e --- /dev/null +++ b/test/MC/AArch64/macro-hex-int.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple aarch64-elf -filetype=obj %s -o - | llvm-objdump -d -r - | FileCheck %s + +.macro do_add sz + add v0.\sz, v0.\sz, v0.\sz +.endm + +do_add 8h +// CHECK: add v0.8h, v0.8h, v0.8h diff --git a/test/MC/X86/intel-syntax-hex.s b/test/MC/X86/intel-syntax-hex.s index b3a19fbaa34..cb73ca9f501 100644 --- a/test/MC/X86/intel-syntax-hex.s +++ b/test/MC/X86/intel-syntax-hex.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s +// RUN: llvm-mc -masm-integers -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s // rdar://12470373 // Checks to make sure we parse the hexadecimal suffix properly. diff --git a/test/MC/X86/pr27884.s b/test/MC/X86/pr27884.s index edd4e8d34a9..d78c35c8fc0 100644 --- a/test/MC/X86/pr27884.s +++ b/test/MC/X86/pr27884.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown %s +// RUN: llvm-mc -triple x86_64-unknown-unknown %s -masm-integers=1 .intel_syntax add rbx, 0B0h diff --git a/test/tools/llvm-mca/X86/intel-syntax.s b/test/tools/llvm-mca/X86/intel-syntax.s index 1aaa3902866..786d06ba0d1 100644 --- a/test/tools/llvm-mca/X86/intel-syntax.s +++ b/test/tools/llvm-mca/X86/intel-syntax.s @@ -5,7 +5,7 @@ .intel_syntax noprefix mov eax, 1 - mov ebx, 0ffh + mov ebx, 0xff imul esi, edi lea eax, [rsi + rdi] diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 0263c866f77..c0976502f54 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -164,6 +164,10 @@ MainFileName("main-file-name", static cl::opt SaveTempLabels("save-temp-labels", cl::desc("Don't discard temporary labels")); +static cl::opt LexMasmIntegers( + "masm-integers", + cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)")); + static cl::opt NoExecStack("no-exec-stack", cl::desc("File doesn't need an exec stack")); @@ -293,6 +297,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget, return SymbolResult; Parser->setShowParsedOperands(ShowInstOperands); Parser->setTargetParser(*TAP); + Parser->getLexer().setLexMasmIntegers(LexMasmIntegers); int Res = Parser->Run(NoInitialTextSection);