mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[AsmParser][SystemZ][z/OS] Add in support to accept "#" as part of an Identifier token
- This patch adds in support to accept the "#" character as part of an Identifier. - This support is needed especially for the HLASM dialect since "#" is treated as part of the valid "Alphabet" range - The way this is done is by making use of the previous precedent set by the `AllowAtInIdentifier` field in `MCAsmLexer.h`. A new field called `AllowHashInIdentifier` is introduced. - The static function `IsIdentifierChar` is also updated to accept the `#` character if the `AllowHashInIdentifier` field is set to true. Note: The field introduced in `MCAsmLexer.h` could very well be moved to `MCAsmInfo.h`. I'm not opposed to it. I decided to put it in `MCAsmLexer` since there seems to be some sort of precedent already with `AllowAtInIdentifier`. Reviewed By: abhina.sreeskantharajan, nickdesaulniers, MaskRay Differential Revision: https://reviews.llvm.org/D99277
This commit is contained in:
parent
70e309f48b
commit
388899404f
@ -48,6 +48,7 @@ protected: // Can only create subclasses.
|
||||
const char *TokStart = nullptr;
|
||||
bool SkipSpace = true;
|
||||
bool AllowAtInIdentifier;
|
||||
bool AllowHashInIdentifier = false;
|
||||
bool IsAtStartOfStatement = true;
|
||||
bool LexMasmHexFloats = false;
|
||||
bool LexMasmIntegers = false;
|
||||
@ -147,6 +148,8 @@ public:
|
||||
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
|
||||
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
|
||||
|
||||
void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
|
||||
|
||||
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
|
||||
this->CommentConsumer = CommentConsumer;
|
||||
}
|
||||
|
@ -143,10 +143,10 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
|
||||
return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
|
||||
}
|
||||
|
||||
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
|
||||
static bool IsIdentifierChar(char c, bool AllowAt) {
|
||||
return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
|
||||
(c == '@' && AllowAt) || c == '?';
|
||||
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@#?]*
|
||||
static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
|
||||
return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
|
||||
(AllowAt && C == '@') || (AllowHash && C == '#');
|
||||
}
|
||||
|
||||
AsmToken AsmLexer::LexIdentifier() {
|
||||
@ -156,12 +156,13 @@ AsmToken AsmLexer::LexIdentifier() {
|
||||
while (isDigit(*CurPtr))
|
||||
++CurPtr;
|
||||
|
||||
if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
|
||||
if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier,
|
||||
AllowHashInIdentifier) ||
|
||||
*CurPtr == 'e' || *CurPtr == 'E')
|
||||
return LexFloatLiteral();
|
||||
}
|
||||
|
||||
while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
|
||||
while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
|
||||
++CurPtr;
|
||||
|
||||
// Handle . as a special case.
|
||||
@ -726,9 +727,10 @@ AsmToken AsmLexer::LexToken() {
|
||||
switch (CurChar) {
|
||||
default:
|
||||
if (MAI.doesAllowSymbolAtNameStart()) {
|
||||
// Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@?]*
|
||||
// Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
|
||||
if (!isDigit(CurChar) &&
|
||||
IsIdentifierChar(CurChar, MAI.doesAllowAtInName()))
|
||||
isIdentifierChar(CurChar, MAI.doesAllowAtInName(),
|
||||
AllowHashInIdentifier))
|
||||
return LexIdentifier();
|
||||
} else {
|
||||
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
|
||||
|
@ -94,8 +94,6 @@ protected:
|
||||
Str.reset(TheTarget->createNullStreamer(*Ctx));
|
||||
|
||||
Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI));
|
||||
// Lex initially to get the string.
|
||||
Parser->getLexer().Lex();
|
||||
}
|
||||
|
||||
void lexAndCheckTokens(StringRef AsmStr,
|
||||
@ -116,6 +114,9 @@ TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) {
|
||||
// Setup.
|
||||
setupCallToAsmParser(AsmStr);
|
||||
|
||||
// Lex initially to get the string.
|
||||
Parser->getLexer().Lex();
|
||||
|
||||
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||
{AsmToken::Identifier, AsmToken::EndOfStatement});
|
||||
lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens);
|
||||
@ -129,6 +130,9 @@ TEST_F(SystemZAsmLexerTest, CheckRestrictCommentStringToStartOfStatement) {
|
||||
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
||||
setupCallToAsmParser(AsmStr);
|
||||
|
||||
// Lex initially to get the string.
|
||||
Parser->getLexer().Lex();
|
||||
|
||||
// When we are restricting the comment string to only the start of the
|
||||
// statement, The sequence of tokens we are expecting are: Identifier - "jne"
|
||||
// Hash - '#'
|
||||
@ -148,8 +152,65 @@ TEST_F(SystemZAsmLexerTest, CheckHLASMComment) {
|
||||
MUPMAI->setCommentString("*");
|
||||
setupCallToAsmParser(AsmStr);
|
||||
|
||||
// Lex initially to get the string.
|
||||
Parser->getLexer().Lex();
|
||||
|
||||
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||
{AsmToken::EndOfStatement, AsmToken::Eof});
|
||||
lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens);
|
||||
}
|
||||
|
||||
TEST_F(SystemZAsmLexerTest, CheckHashDefault) {
|
||||
StringRef AsmStr = "lh#123";
|
||||
|
||||
// Setup.
|
||||
setupCallToAsmParser(AsmStr);
|
||||
|
||||
// Lex initially to get the string.
|
||||
Parser->getLexer().Lex();
|
||||
|
||||
// "lh" -> Identifier
|
||||
// "#123" -> EndOfStatement (Lexed as a comment since CommentString is "#")
|
||||
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
||||
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
||||
}
|
||||
|
||||
// Test if "#" is accepted as an Identifier
|
||||
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier) {
|
||||
StringRef AsmStr = "lh#123";
|
||||
|
||||
// Setup.
|
||||
setupCallToAsmParser(AsmStr);
|
||||
Parser->getLexer().setAllowHashInIdentifier(true);
|
||||
|
||||
// Lex initially to get the string.
|
||||
Parser->getLexer().Lex();
|
||||
|
||||
// "lh123" -> Identifier
|
||||
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
||||
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
||||
}
|
||||
|
||||
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) {
|
||||
StringRef AsmStr = "lh#12*3";
|
||||
|
||||
// Setup.
|
||||
MUPMAI->setCommentString("*");
|
||||
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
||||
setupCallToAsmParser(AsmStr);
|
||||
Parser->getLexer().setAllowHashInIdentifier(true);
|
||||
|
||||
// Lex initially to get the string.
|
||||
Parser->getLexer().Lex();
|
||||
|
||||
// "lh#12" -> Identifier
|
||||
// "*" -> Star
|
||||
// "3" -> Integer
|
||||
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||
{AsmToken::Identifier, AsmToken::Star, AsmToken::Integer,
|
||||
AsmToken::EndOfStatement, AsmToken::Eof});
|
||||
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
||||
}
|
||||
} // end anonymous namespace
|
||||
|
Loading…
Reference in New Issue
Block a user