From d7da6c0d4366991a4336fc1d431f2c5959f5a295 Mon Sep 17 00:00:00 2001 From: Anirudh Prasad Date: Tue, 13 Apr 2021 11:07:46 -0400 Subject: [PATCH] [AsmParser][SystemZ][z/OS] Add in support to allow use of additional comment strings. - Currently, MCAsmInfo provides a CommentString attribute, that various targets can set, so that the AsmLexer can appropriately lex a string as a comment based on the set value of the attribute. - However, AsmLexer also supports a few additional comment syntaxes, in addition to what's specified as a CommentString attribute. This includes regular C-style block comments (/* ... */), regular C-style line comments (// .... ) and #. While I'm not sure as to why this behaviour exists, I am assuming it does to maintain backward compatibility with GNU AS (see https://sourceware.org/binutils/docs/as/Comments.html#Comments for reference) For example: Consider a target which sets the CommentString attribute to '*'. The following strings are all lexed as comments. ``` "# abc" -> comment "// abc" -> comment "/* abc */ -> comment "* abc" -> comment ``` - In HLASM however, only "*" is accepted as a comment string, and nothing else. - To achieve this, an additional attribute (`AllowAdditionalComments`) has been added to MCAsmInfo. If this attribute is set to false, then only the string specified by the CommentString attribute is used as a possible comment string to be lexed by the AsmLexer. The regular C-style block comments, line comments and "#" are disabled. As a final note, "#" will still be treated as a comment, if the CommentString attribute is set to "#". Depends on https://reviews.llvm.org/D99277 Reviewed By: abhina.sreeskantharajan, myiwanch Differential Revision: https://reviews.llvm.org/D99286 --- include/llvm/MC/MCAsmInfo.h | 9 + lib/MC/MCParser/AsmLexer.cpp | 10 +- .../SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp | 1 + unittests/MC/SystemZ/SystemZAsmLexerTest.cpp | 154 ++++++++++++++++++ 4 files changed, 173 insertions(+), 1 deletion(-) diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 216e01985cc..656cb29e213 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -130,6 +130,14 @@ protected: /// at the beginning of statements. Defaults to false. bool RestrictCommentStringToStartOfStatement = false; + /// This indicates whether to allow additional "comment strings" to be lexed + /// as a comment. Setting this attribute to true, will ensure that C-style + /// line comments (// ..), C-style block comments (/* .. */), and "#" are + /// all treated as comments in addition to the string specified by the + /// CommentString attribute. + /// Default is true. + bool AllowAdditionalComments = true; + /// This is appended to emitted labels. Defaults to ":" const char *LabelSuffix; @@ -567,6 +575,7 @@ public: bool getRestrictCommentStringToStartOfStatement() const { return RestrictCommentStringToStartOfStatement; } + bool shouldAllowAdditionalComments() const { return AllowAdditionalComments; } const char *getLabelSuffix() const { return LabelSuffix; } bool useAssignmentForEHBegin() const { return UseAssignmentForEHBegin; } diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index f8e8eea9c2a..ab105c61005 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -175,7 +175,13 @@ AsmToken AsmLexer::LexIdentifier() { /// LexSlash: Slash: / /// C-Style Comment: /* ... */ +/// C-style Comment: // ... AsmToken AsmLexer::LexSlash() { + if (!MAI.shouldAllowAdditionalComments()) { + IsAtStartOfStatement = false; + return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); + } + switch (*CurPtr) { case '*': IsAtStartOfStatement = false; @@ -729,7 +735,9 @@ AsmToken AsmLexer::LexToken() { UnLex(TokenBuf[0]); return AsmToken(AsmToken::HashDirective, s); } - return LexLineComment(); + + if (MAI.shouldAllowAdditionalComments()) + return LexLineComment(); } if (isAtStartOfComment(TokStart)) diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 8c4567cd1c4..0c7a1338aaa 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -23,6 +23,7 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { CommentString = AssemblerDialect == AD_HLASM ? "*" : "#"; RestrictCommentStringToStartOfStatement = (AssemblerDialect == AD_HLASM); + AllowAdditionalComments = (AssemblerDialect == AD_ATT); ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; UsesELFSectionDirectiveForBSS = true; diff --git a/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp index 8eea737886d..a1253eaff43 100644 --- a/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp +++ b/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp @@ -32,6 +32,9 @@ public: RestrictCommentStringToStartOfStatement = Value; } void setCommentString(StringRef Value) { CommentString = Value; } + void setAllowAdditionalComments(bool Value) { + AllowAdditionalComments = Value; + } }; // Setup a testing class that the GTest framework can call. @@ -213,4 +216,155 @@ TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) { AsmToken::EndOfStatement, AsmToken::Eof}); lexAndCheckTokens(AsmStr, ExpectedTokens); } + +TEST_F(SystemZAsmLexerTest, DontCheckStrictCommentString) { + StringRef AsmStr = "# abc\n/* def */// xyz"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Comment, AsmToken::EndOfStatement, + AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, DontCheckStrictCommentString2) { + StringRef AsmStr = "# abc\n/* def */// xyz\n* rst"; + + // Setup. + MUPMAI->setCommentString("*"); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Comment, AsmToken::EndOfStatement, + AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckStrictCommentString) { + StringRef AsmStr = "# abc\n/* def */// xyz"; + + // Setup. + MUPMAI->setAllowAdditionalComments(false); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // "# abc" -> still treated as a comment, since CommentString + // is set to "#" + SmallVector ExpectedTokens; + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "# abc\n" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Star); // "*" + ExpectedTokens.push_back(AsmToken::Identifier); // "def" + ExpectedTokens.push_back(AsmToken::Star); // "*" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Identifier); // "xyz" + ExpectedTokens.push_back(AsmToken::EndOfStatement); + ExpectedTokens.push_back(AsmToken::Eof); + + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckStrictCommentString2) { + StringRef AsmStr = "// abc"; + + // Setup. + MUPMAI->setAllowAdditionalComments(false); + MUPMAI->setCommentString("//"); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // "// abc" -> will still be treated as a comment because "//" is the + // CommentString + SmallVector ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr /* "// abc" */, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckStrictCommentString3) { + StringRef AsmStr = "/* abc */"; + + // Setup. + MUPMAI->setAllowAdditionalComments(false); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens; + ExpectedTokens.push_back(AsmToken::Slash); + ExpectedTokens.push_back(AsmToken::Star); + ExpectedTokens.push_back(AsmToken::Identifier); + ExpectedTokens.push_back(AsmToken::Star); + ExpectedTokens.push_back(AsmToken::Slash); + ExpectedTokens.push_back(AsmToken::EndOfStatement); + ExpectedTokens.push_back(AsmToken::Eof); + + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckStrictCommentString4) { + StringRef AsmStr = "# abc\n/* def */// xyz"; + + // Setup. + MUPMAI->setCommentString("*"); + MUPMAI->setAllowAdditionalComments(false); + MUPMAI->setRestrictCommentStringToStartOfStatement(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens; + ExpectedTokens.push_back(AsmToken::Hash); // "#" + ExpectedTokens.push_back(AsmToken::Identifier); // "abc" + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Star); // "*" + ExpectedTokens.push_back(AsmToken::Identifier); // "def" + ExpectedTokens.push_back(AsmToken::Star); // "*" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::Identifier); // "xyz" + ExpectedTokens.push_back(AsmToken::EndOfStatement); + ExpectedTokens.push_back(AsmToken::Eof); + + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckStrictCommentString5) { + StringRef AsmStr = "#abc\n/* def */// xyz"; + + // Setup. + MUPMAI->setCommentString("*"); + MUPMAI->setAllowAdditionalComments(false); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens; + ExpectedTokens.push_back(AsmToken::Hash); // "#" + ExpectedTokens.push_back(AsmToken::Identifier); // "abc" + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n" + ExpectedTokens.push_back(AsmToken::Slash); // "/" + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "* def */// xyz" + ExpectedTokens.push_back(AsmToken::Eof); + + lexAndCheckTokens(AsmStr, ExpectedTokens); +} } // end anonymous namespace