mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AsmParser][SystemZ][z/OS] Add in support to accept "#" as part of an Identifier token
- This patch adds in support to accept the "#" character as part of an Identifier. - This support is needed especially for the HLASM dialect since "#" is treated as part of the valid "Alphabet" range - The way this is done is by making use of the previous precedent set by the `AllowAtInIdentifier` field in `MCAsmLexer.h`. A new field called `AllowHashInIdentifier` is introduced. - The static function `IsIdentifierChar` is also updated to accept the `#` character if the `AllowHashInIdentifier` field is set to true. Note: The field introduced in `MCAsmLexer.h` could very well be moved to `MCAsmInfo.h`. I'm not opposed to it. I decided to put it in `MCAsmLexer` since there seems to be some sort of precedent already with `AllowAtInIdentifier`. Reviewed By: abhina.sreeskantharajan, nickdesaulniers, MaskRay Differential Revision: https://reviews.llvm.org/D99277
This commit is contained in:
parent
70e309f48b
commit
388899404f
@ -48,6 +48,7 @@ protected: // Can only create subclasses.
|
|||||||
const char *TokStart = nullptr;
|
const char *TokStart = nullptr;
|
||||||
bool SkipSpace = true;
|
bool SkipSpace = true;
|
||||||
bool AllowAtInIdentifier;
|
bool AllowAtInIdentifier;
|
||||||
|
bool AllowHashInIdentifier = false;
|
||||||
bool IsAtStartOfStatement = true;
|
bool IsAtStartOfStatement = true;
|
||||||
bool LexMasmHexFloats = false;
|
bool LexMasmHexFloats = false;
|
||||||
bool LexMasmIntegers = false;
|
bool LexMasmIntegers = false;
|
||||||
@ -147,6 +148,8 @@ public:
|
|||||||
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
|
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
|
||||||
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
|
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
|
||||||
|
|
||||||
|
void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
|
||||||
|
|
||||||
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
|
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
|
||||||
this->CommentConsumer = CommentConsumer;
|
this->CommentConsumer = CommentConsumer;
|
||||||
}
|
}
|
||||||
|
@ -143,10 +143,10 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
|
|||||||
return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
|
return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
|
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@#?]*
|
||||||
static bool IsIdentifierChar(char c, bool AllowAt) {
|
static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
|
||||||
return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
|
return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
|
||||||
(c == '@' && AllowAt) || c == '?';
|
(AllowAt && C == '@') || (AllowHash && C == '#');
|
||||||
}
|
}
|
||||||
|
|
||||||
AsmToken AsmLexer::LexIdentifier() {
|
AsmToken AsmLexer::LexIdentifier() {
|
||||||
@ -156,12 +156,13 @@ AsmToken AsmLexer::LexIdentifier() {
|
|||||||
while (isDigit(*CurPtr))
|
while (isDigit(*CurPtr))
|
||||||
++CurPtr;
|
++CurPtr;
|
||||||
|
|
||||||
if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
|
if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier,
|
||||||
|
AllowHashInIdentifier) ||
|
||||||
*CurPtr == 'e' || *CurPtr == 'E')
|
*CurPtr == 'e' || *CurPtr == 'E')
|
||||||
return LexFloatLiteral();
|
return LexFloatLiteral();
|
||||||
}
|
}
|
||||||
|
|
||||||
while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
|
while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
|
||||||
++CurPtr;
|
++CurPtr;
|
||||||
|
|
||||||
// Handle . as a special case.
|
// Handle . as a special case.
|
||||||
@ -726,9 +727,10 @@ AsmToken AsmLexer::LexToken() {
|
|||||||
switch (CurChar) {
|
switch (CurChar) {
|
||||||
default:
|
default:
|
||||||
if (MAI.doesAllowSymbolAtNameStart()) {
|
if (MAI.doesAllowSymbolAtNameStart()) {
|
||||||
// Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@?]*
|
// Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
|
||||||
if (!isDigit(CurChar) &&
|
if (!isDigit(CurChar) &&
|
||||||
IsIdentifierChar(CurChar, MAI.doesAllowAtInName()))
|
isIdentifierChar(CurChar, MAI.doesAllowAtInName(),
|
||||||
|
AllowHashInIdentifier))
|
||||||
return LexIdentifier();
|
return LexIdentifier();
|
||||||
} else {
|
} else {
|
||||||
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
|
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
|
||||||
|
@ -94,8 +94,6 @@ protected:
|
|||||||
Str.reset(TheTarget->createNullStreamer(*Ctx));
|
Str.reset(TheTarget->createNullStreamer(*Ctx));
|
||||||
|
|
||||||
Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI));
|
Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI));
|
||||||
// Lex initially to get the string.
|
|
||||||
Parser->getLexer().Lex();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexAndCheckTokens(StringRef AsmStr,
|
void lexAndCheckTokens(StringRef AsmStr,
|
||||||
@ -116,6 +114,9 @@ TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) {
|
|||||||
// Setup.
|
// Setup.
|
||||||
setupCallToAsmParser(AsmStr);
|
setupCallToAsmParser(AsmStr);
|
||||||
|
|
||||||
|
// Lex initially to get the string.
|
||||||
|
Parser->getLexer().Lex();
|
||||||
|
|
||||||
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||||
{AsmToken::Identifier, AsmToken::EndOfStatement});
|
{AsmToken::Identifier, AsmToken::EndOfStatement});
|
||||||
lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens);
|
lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens);
|
||||||
@ -129,6 +130,9 @@ TEST_F(SystemZAsmLexerTest, CheckRestrictCommentStringToStartOfStatement) {
|
|||||||
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
||||||
setupCallToAsmParser(AsmStr);
|
setupCallToAsmParser(AsmStr);
|
||||||
|
|
||||||
|
// Lex initially to get the string.
|
||||||
|
Parser->getLexer().Lex();
|
||||||
|
|
||||||
// When we are restricting the comment string to only the start of the
|
// When we are restricting the comment string to only the start of the
|
||||||
// statement, The sequence of tokens we are expecting are: Identifier - "jne"
|
// statement, The sequence of tokens we are expecting are: Identifier - "jne"
|
||||||
// Hash - '#'
|
// Hash - '#'
|
||||||
@ -148,8 +152,65 @@ TEST_F(SystemZAsmLexerTest, CheckHLASMComment) {
|
|||||||
MUPMAI->setCommentString("*");
|
MUPMAI->setCommentString("*");
|
||||||
setupCallToAsmParser(AsmStr);
|
setupCallToAsmParser(AsmStr);
|
||||||
|
|
||||||
|
// Lex initially to get the string.
|
||||||
|
Parser->getLexer().Lex();
|
||||||
|
|
||||||
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||||
{AsmToken::EndOfStatement, AsmToken::Eof});
|
{AsmToken::EndOfStatement, AsmToken::Eof});
|
||||||
lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens);
|
lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(SystemZAsmLexerTest, CheckHashDefault) {
|
||||||
|
StringRef AsmStr = "lh#123";
|
||||||
|
|
||||||
|
// Setup.
|
||||||
|
setupCallToAsmParser(AsmStr);
|
||||||
|
|
||||||
|
// Lex initially to get the string.
|
||||||
|
Parser->getLexer().Lex();
|
||||||
|
|
||||||
|
// "lh" -> Identifier
|
||||||
|
// "#123" -> EndOfStatement (Lexed as a comment since CommentString is "#")
|
||||||
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||||
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
||||||
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test if "#" is accepted as an Identifier
|
||||||
|
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier) {
|
||||||
|
StringRef AsmStr = "lh#123";
|
||||||
|
|
||||||
|
// Setup.
|
||||||
|
setupCallToAsmParser(AsmStr);
|
||||||
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
||||||
|
|
||||||
|
// Lex initially to get the string.
|
||||||
|
Parser->getLexer().Lex();
|
||||||
|
|
||||||
|
// "lh123" -> Identifier
|
||||||
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||||
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
||||||
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) {
|
||||||
|
StringRef AsmStr = "lh#12*3";
|
||||||
|
|
||||||
|
// Setup.
|
||||||
|
MUPMAI->setCommentString("*");
|
||||||
|
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
||||||
|
setupCallToAsmParser(AsmStr);
|
||||||
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
||||||
|
|
||||||
|
// Lex initially to get the string.
|
||||||
|
Parser->getLexer().Lex();
|
||||||
|
|
||||||
|
// "lh#12" -> Identifier
|
||||||
|
// "*" -> Star
|
||||||
|
// "3" -> Integer
|
||||||
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
||||||
|
{AsmToken::Identifier, AsmToken::Star, AsmToken::Integer,
|
||||||
|
AsmToken::EndOfStatement, AsmToken::Eof});
|
||||||
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
||||||
|
}
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
Loading…
Reference in New Issue
Block a user