1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[AsmParser][SystemZ][z/OS] Add in support to accept "#" as part of an Identifier token

- This patch adds in support to accept the "#" character as part of an Identifier.
- This support is needed especially for the HLASM dialect since "#" is treated as part of the valid "Alphabet" range
- The way this is done is by making use of the previous precedent set by the `AllowAtInIdentifier` field in `MCAsmLexer.h`. A new field called `AllowHashInIdentifier` is introduced.
- The static function `IsIdentifierChar` is also updated to accept the `#` character if the `AllowHashInIdentifier` field is set to true.
Note: The field introduced in `MCAsmLexer.h` could very well be moved to `MCAsmInfo.h`. I'm not opposed to it. I decided to put it in `MCAsmLexer` since there seems to be some sort of precedent already with `AllowAtInIdentifier`.

Reviewed By: abhina.sreeskantharajan, nickdesaulniers, MaskRay

Differential Revision: https://reviews.llvm.org/D99277
This commit is contained in:
Anirudh Prasad 2021-04-01 10:38:42 -04:00
parent 70e309f48b
commit 388899404f
3 changed files with 76 additions and 10 deletions

View File

@ -48,6 +48,7 @@ protected: // Can only create subclasses.
const char *TokStart = nullptr; const char *TokStart = nullptr;
bool SkipSpace = true; bool SkipSpace = true;
bool AllowAtInIdentifier; bool AllowAtInIdentifier;
bool AllowHashInIdentifier = false;
bool IsAtStartOfStatement = true; bool IsAtStartOfStatement = true;
bool LexMasmHexFloats = false; bool LexMasmHexFloats = false;
bool LexMasmIntegers = false; bool LexMasmIntegers = false;
@ -147,6 +148,8 @@ public:
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
this->CommentConsumer = CommentConsumer; this->CommentConsumer = CommentConsumer;
} }

View File

@ -143,10 +143,10 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
} }
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@#?]*
static bool IsIdentifierChar(char c, bool AllowAt) { static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
return isAlnum(c) || c == '_' || c == '$' || c == '.' || return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
(c == '@' && AllowAt) || c == '?'; (AllowAt && C == '@') || (AllowHash && C == '#');
} }
AsmToken AsmLexer::LexIdentifier() { AsmToken AsmLexer::LexIdentifier() {
@ -156,12 +156,13 @@ AsmToken AsmLexer::LexIdentifier() {
while (isDigit(*CurPtr)) while (isDigit(*CurPtr))
++CurPtr; ++CurPtr;
if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) || if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier,
AllowHashInIdentifier) ||
*CurPtr == 'e' || *CurPtr == 'E') *CurPtr == 'e' || *CurPtr == 'E')
return LexFloatLiteral(); return LexFloatLiteral();
} }
while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
++CurPtr; ++CurPtr;
// Handle . as a special case. // Handle . as a special case.
@ -726,9 +727,10 @@ AsmToken AsmLexer::LexToken() {
switch (CurChar) { switch (CurChar) {
default: default:
if (MAI.doesAllowSymbolAtNameStart()) { if (MAI.doesAllowSymbolAtNameStart()) {
// Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@?]* // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
if (!isDigit(CurChar) && if (!isDigit(CurChar) &&
IsIdentifierChar(CurChar, MAI.doesAllowAtInName())) isIdentifierChar(CurChar, MAI.doesAllowAtInName(),
AllowHashInIdentifier))
return LexIdentifier(); return LexIdentifier();
} else { } else {
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*

View File

@ -94,8 +94,6 @@ protected:
Str.reset(TheTarget->createNullStreamer(*Ctx)); Str.reset(TheTarget->createNullStreamer(*Ctx));
Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI)); Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI));
// Lex initially to get the string.
Parser->getLexer().Lex();
} }
void lexAndCheckTokens(StringRef AsmStr, void lexAndCheckTokens(StringRef AsmStr,
@ -116,6 +114,9 @@ TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) {
// Setup. // Setup.
setupCallToAsmParser(AsmStr); setupCallToAsmParser(AsmStr);
// Lex initially to get the string.
Parser->getLexer().Lex();
SmallVector<AsmToken::TokenKind> ExpectedTokens( SmallVector<AsmToken::TokenKind> ExpectedTokens(
{AsmToken::Identifier, AsmToken::EndOfStatement}); {AsmToken::Identifier, AsmToken::EndOfStatement});
lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens); lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens);
@ -129,6 +130,9 @@ TEST_F(SystemZAsmLexerTest, CheckRestrictCommentStringToStartOfStatement) {
MUPMAI->setRestrictCommentStringToStartOfStatement(true); MUPMAI->setRestrictCommentStringToStartOfStatement(true);
setupCallToAsmParser(AsmStr); setupCallToAsmParser(AsmStr);
// Lex initially to get the string.
Parser->getLexer().Lex();
// When we are restricting the comment string to only the start of the // When we are restricting the comment string to only the start of the
// statement, The sequence of tokens we are expecting are: Identifier - "jne" // statement, The sequence of tokens we are expecting are: Identifier - "jne"
// Hash - '#' // Hash - '#'
@ -148,8 +152,65 @@ TEST_F(SystemZAsmLexerTest, CheckHLASMComment) {
MUPMAI->setCommentString("*"); MUPMAI->setCommentString("*");
setupCallToAsmParser(AsmStr); setupCallToAsmParser(AsmStr);
// Lex initially to get the string.
Parser->getLexer().Lex();
SmallVector<AsmToken::TokenKind> ExpectedTokens( SmallVector<AsmToken::TokenKind> ExpectedTokens(
{AsmToken::EndOfStatement, AsmToken::Eof}); {AsmToken::EndOfStatement, AsmToken::Eof});
lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens); lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens);
} }
TEST_F(SystemZAsmLexerTest, CheckHashDefault) {
StringRef AsmStr = "lh#123";
// Setup.
setupCallToAsmParser(AsmStr);
// Lex initially to get the string.
Parser->getLexer().Lex();
// "lh" -> Identifier
// "#123" -> EndOfStatement (Lexed as a comment since CommentString is "#")
SmallVector<AsmToken::TokenKind> ExpectedTokens(
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
// Test if "#" is accepted as an Identifier
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier) {
StringRef AsmStr = "lh#123";
// Setup.
setupCallToAsmParser(AsmStr);
Parser->getLexer().setAllowHashInIdentifier(true);
// Lex initially to get the string.
Parser->getLexer().Lex();
// "lh123" -> Identifier
SmallVector<AsmToken::TokenKind> ExpectedTokens(
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) {
StringRef AsmStr = "lh#12*3";
// Setup.
MUPMAI->setCommentString("*");
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
setupCallToAsmParser(AsmStr);
Parser->getLexer().setAllowHashInIdentifier(true);
// Lex initially to get the string.
Parser->getLexer().Lex();
// "lh#12" -> Identifier
// "*" -> Star
// "3" -> Integer
SmallVector<AsmToken::TokenKind> ExpectedTokens(
{AsmToken::Identifier, AsmToken::Star, AsmToken::Integer,
AsmToken::EndOfStatement, AsmToken::Eof});
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
} // end anonymous namespace } // end anonymous namespace