2021-03-24 15:15:15 +01:00
|
|
|
//===- llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp ----------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
|
|
|
#include "llvm/MC/MCContext.h"
|
|
|
|
#include "llvm/MC/MCObjectFileInfo.h"
|
|
|
|
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
|
|
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
|
|
#include "llvm/MC/MCStreamer.h"
|
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
|
|
#include "llvm/Support/SourceMgr.h"
|
|
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
|
|
#include "llvm/Support/TargetSelect.h"
|
|
|
|
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Come up with our hacked version of MCAsmInfo.
|
|
|
|
// This hacked version derives from the main MCAsmInfo instance.
|
|
|
|
// Here, we're free to override whatever we want, without polluting
|
|
|
|
// the main MCAsmInfo interface.
|
|
|
|
class MockedUpMCAsmInfo : public MCAsmInfo {
|
|
|
|
public:
|
|
|
|
void setRestrictCommentStringToStartOfStatement(bool Value) {
|
|
|
|
RestrictCommentStringToStartOfStatement = Value;
|
|
|
|
}
|
|
|
|
void setCommentString(StringRef Value) { CommentString = Value; }
|
[AsmParser][SystemZ][z/OS] Add in support to allow use of additional comment strings.
- Currently, MCAsmInfo provides a CommentString attribute, that various targets can set, so that the AsmLexer can appropriately lex a string as a comment based on the set value of the attribute.
- However, AsmLexer also supports a few additional comment syntaxes, in addition to what's specified as a CommentString attribute. This includes regular C-style block comments (/* ... */), regular C-style line comments (// .... ) and #. While I'm not sure as to why this behaviour exists, I am assuming it does to maintain backward compatibility with GNU AS (see https://sourceware.org/binutils/docs/as/Comments.html#Comments for reference)
For example:
Consider a target which sets the CommentString attribute to '*'.
The following strings are all lexed as comments.
```
"# abc" -> comment
"// abc" -> comment
"/* abc */ -> comment
"* abc" -> comment
```
- In HLASM however, only "*" is accepted as a comment string, and nothing else.
- To achieve this, an additional attribute (`AllowAdditionalComments`) has been added to MCAsmInfo. If this attribute is set to false, then only the string specified by the CommentString attribute is used as a possible comment string to be lexed by the AsmLexer. The regular C-style block comments, line comments and "#" are disabled. As a final note, "#" will still be treated as a comment, if the CommentString attribute is set to "#".
Depends on https://reviews.llvm.org/D99277
Reviewed By: abhina.sreeskantharajan, myiwanch
Differential Revision: https://reviews.llvm.org/D99286
2021-04-13 17:07:46 +02:00
|
|
|
void setAllowAdditionalComments(bool Value) {
|
|
|
|
AllowAdditionalComments = Value;
|
|
|
|
}
|
[AsmParser][ms][X86] Fix possible misbehaviour in parsing of special tokens at start of string.
- Previously, https://reviews.llvm.org/D72680 introduced a new attribute called `AllowSymbolAtNameStart` (in relation to the MAsmParser changes) in `MCAsmInfo.h` which (according to the comment in the header) allows the following behaviour:
```
/// This is true if the assembler allows $ @ ? characters at the start of
/// symbol names. Defaults to false.
```
- However, the usage of this field in AsmLexer.cpp doesn't seem completely accurate* for a couple of reasons.
```
default:
if (MAI.doesAllowSymbolAtNameStart()) {
// Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
if (!isDigit(CurChar) &&
isIdentifierChar(CurChar, MAI.doesAllowAtInName(),
AllowHashInIdentifier))
return LexIdentifier();
}
```
1. The Dollar and At tokens, when occurring at the start of the string, are treated as separate tokens (AsmToken::Dollar and AsmToken::At respectively) and not lexed as an Identifier.
2. I'm not too sure why `MAI.doesAllowAtInName()` is used when `AllowAtInIdentifier` could be used. For X86 platforms, afaict, this shouldn't be an issue, since the `CommentString` attribute isn't "@". (alternatively the call to the setter can be set anywhere else as needed). The `AllowAtInName` does have an additional important meaning, but in the context of AsmLexer, shouldn't mean anything different compared to `AllowAtInIdentifier`
My proposal is the following:
- Introduce 3 new fields called `AllowQuestionTokenAtStartOfString`, `AllowDollarTokenAtStartOfString` and `AllowAtTokenAtStartOfString` in MCAsmInfo.h which will encapsulate the previously documented behaviour of "allowing $, @, ? characters at the start of symbol names")
- Introduce these fields where "$", "@" are lexed, and treat them as identifiers depending on whether `Allow[Dollar|At]TokenAtStartOfString` is set.
- For the sole case of "?", append it to the existing logic for treating a "default" token as an Identifier.
z/OS (HLASM) will also make use of some of these fields in follow up patches.
completely accurate* - This was based on the comments and the intended behaviour the code. I might have completely misinterpreted it, and if that is the case my sincere apologies. We can close this patch if necessary, if there are no changes to be made :)
Depends on https://reviews.llvm.org/D99374
Reviewed By: Jonathan.Crowther
Differential Revision: https://reviews.llvm.org/D99889
2021-04-21 16:19:52 +02:00
|
|
|
void setAllowQuestionAtStartOfIdentifier(bool Value) {
|
|
|
|
AllowQuestionAtStartOfIdentifier = Value;
|
|
|
|
}
|
|
|
|
void setAllowAtAtStartOfIdentifier(bool Value) {
|
|
|
|
AllowAtAtStartOfIdentifier = Value;
|
|
|
|
}
|
|
|
|
void setAllowDollarAtStartOfIdentifier(bool Value) {
|
|
|
|
AllowDollarAtStartOfIdentifier = Value;
|
|
|
|
}
|
[AsmParser][SystemZ][z/OS] Use updated framework in AsmLexer to accept special tokens as Identifiers
- Previously, https://reviews.llvm.org/D99889 changed the framework in the AsmLexer to treat special tokens, if they occur at the start of the string, as Identifiers.
- These are used by the MASM Parser implementation in LLVM, and we can extend some of the changes made in the previous patch to SystemZ.
- In SystemZ, the special "tokens" referred to here are "_", "$", "@", "#". [_|$|@|#] are already supported as "part" of an Identifier.
- The changes in this patch ensure that these special tokens, when they occur at the start of the Identifier, are treated as Identifiers.
Reviewed By: abhina.sreeskantharajan
Differential Revision: https://reviews.llvm.org/D100959
2021-04-28 21:42:23 +02:00
|
|
|
void setAllowHashAtStartOfIdentifier(bool Value) {
|
|
|
|
AllowHashAtStartOfIdentifier = Value;
|
|
|
|
}
|
2021-04-29 17:27:56 +02:00
|
|
|
void setAllowDotIsPC(bool Value) { DotIsPC = Value; }
|
2021-05-26 16:36:50 +02:00
|
|
|
void setAssemblerDialect(unsigned Value) { AssemblerDialect = Value; }
|
2021-06-24 18:49:38 +02:00
|
|
|
void setEmitLabelsInUpperCase(bool Value) { EmitLabelsInUpperCase = Value; }
|
2021-03-24 15:15:15 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
// Setup a testing class that the GTest framework can call.
|
|
|
|
class SystemZAsmLexerTest : public ::testing::Test {
|
|
|
|
protected:
|
|
|
|
static void SetUpTestCase() {
|
|
|
|
LLVMInitializeSystemZTargetInfo();
|
|
|
|
LLVMInitializeSystemZTargetMC();
|
2021-04-29 17:27:56 +02:00
|
|
|
LLVMInitializeSystemZAsmParser();
|
2021-03-24 15:15:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<MCRegisterInfo> MRI;
|
|
|
|
std::unique_ptr<MockedUpMCAsmInfo> MUPMAI;
|
|
|
|
std::unique_ptr<const MCInstrInfo> MII;
|
2021-05-23 23:15:23 +02:00
|
|
|
std::unique_ptr<MCObjectFileInfo> MOFI;
|
2021-03-24 15:15:15 +01:00
|
|
|
std::unique_ptr<MCStreamer> Str;
|
|
|
|
std::unique_ptr<MCAsmParser> Parser;
|
|
|
|
std::unique_ptr<MCContext> Ctx;
|
2021-04-29 17:27:56 +02:00
|
|
|
std::unique_ptr<MCSubtargetInfo> STI;
|
|
|
|
std::unique_ptr<MCTargetAsmParser> TargetAsmParser;
|
2021-03-24 15:15:15 +01:00
|
|
|
|
|
|
|
SourceMgr SrcMgr;
|
|
|
|
std::string TripleName;
|
|
|
|
llvm::Triple Triple;
|
|
|
|
const Target *TheTarget;
|
|
|
|
|
|
|
|
const MCTargetOptions MCOptions;
|
|
|
|
|
|
|
|
SystemZAsmLexerTest() {
|
|
|
|
// We will use the SystemZ triple, because of missing
|
|
|
|
// Object File and Streamer support for the z/OS target.
|
|
|
|
TripleName = "s390x-ibm-linux";
|
|
|
|
Triple = llvm::Triple(TripleName);
|
|
|
|
|
|
|
|
std::string Error;
|
|
|
|
TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
|
|
|
|
EXPECT_NE(TheTarget, nullptr);
|
|
|
|
|
|
|
|
MRI.reset(TheTarget->createMCRegInfo(TripleName));
|
|
|
|
EXPECT_NE(MRI, nullptr);
|
|
|
|
|
2021-04-29 17:27:56 +02:00
|
|
|
MII.reset(TheTarget->createMCInstrInfo());
|
|
|
|
EXPECT_NE(MII, nullptr);
|
|
|
|
|
|
|
|
STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "z10", ""));
|
|
|
|
EXPECT_NE(STI, nullptr);
|
|
|
|
|
2021-03-24 15:15:15 +01:00
|
|
|
std::unique_ptr<MCAsmInfo> MAI;
|
|
|
|
MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
|
|
|
|
EXPECT_NE(MAI, nullptr);
|
|
|
|
|
|
|
|
// Now we cast to our mocked up version of MCAsmInfo.
|
|
|
|
MUPMAI.reset(static_cast<MockedUpMCAsmInfo *>(MAI.release()));
|
|
|
|
// MUPMAI should "hold" MAI.
|
|
|
|
EXPECT_NE(MUPMAI, nullptr);
|
|
|
|
// After releasing, MAI should now be null.
|
|
|
|
EXPECT_EQ(MAI, nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void setupCallToAsmParser(StringRef AsmStr) {
|
|
|
|
std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(AsmStr));
|
|
|
|
SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
|
|
|
|
EXPECT_EQ(Buffer, nullptr);
|
|
|
|
|
2021-05-23 23:15:23 +02:00
|
|
|
Ctx.reset(new MCContext(Triple, MUPMAI.get(), MRI.get(), STI.get(), &SrcMgr,
|
|
|
|
&MCOptions));
|
|
|
|
MOFI.reset(TheTarget->createMCObjectFileInfo(*Ctx, /*PIC=*/false,
|
|
|
|
/*LargeCodeModel=*/false));
|
|
|
|
Ctx->setObjectFileInfo(MOFI.get());
|
2021-03-24 15:15:15 +01:00
|
|
|
|
|
|
|
Str.reset(TheTarget->createNullStreamer(*Ctx));
|
|
|
|
|
|
|
|
Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI));
|
2021-04-29 17:27:56 +02:00
|
|
|
|
|
|
|
TargetAsmParser.reset(
|
|
|
|
TheTarget->createMCAsmParser(*STI, *Parser, *MII, MCOptions));
|
|
|
|
Parser->setTargetParser(*TargetAsmParser);
|
2021-03-24 15:15:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void lexAndCheckTokens(StringRef AsmStr,
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens) {
|
|
|
|
// Get reference to AsmLexer.
|
|
|
|
MCAsmLexer &Lexer = Parser->getLexer();
|
|
|
|
// Loop through all expected tokens checking one by one.
|
|
|
|
for (size_t I = 0; I < ExpectedTokens.size(); ++I) {
|
|
|
|
EXPECT_EQ(Lexer.getTok().getKind(), ExpectedTokens[I]);
|
|
|
|
Lexer.Lex();
|
|
|
|
}
|
|
|
|
}
|
2021-04-13 21:25:00 +02:00
|
|
|
|
|
|
|
void lexAndCheckIntegerTokensAndValues(StringRef AsmStr,
|
|
|
|
SmallVector<int64_t> ExpectedValues) {
|
|
|
|
// Get reference to AsmLexer.
|
|
|
|
MCAsmLexer &Lexer = Parser->getLexer();
|
|
|
|
// Loop through all expected tokens and expected values.
|
|
|
|
for (size_t I = 0; I < ExpectedValues.size(); ++I) {
|
|
|
|
// Skip any EndOfStatement tokens, we're not concerned with them.
|
|
|
|
if (Lexer.getTok().getKind() == AsmToken::EndOfStatement)
|
|
|
|
continue;
|
|
|
|
EXPECT_EQ(Lexer.getTok().getKind(), AsmToken::Integer);
|
|
|
|
EXPECT_EQ(Lexer.getTok().getIntVal(), ExpectedValues[I]);
|
|
|
|
Lexer.Lex();
|
|
|
|
}
|
|
|
|
}
|
2021-03-24 15:15:15 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) {
|
|
|
|
StringRef AsmStr = "jne #-4";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
2021-04-01 16:38:42 +02:00
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
2021-03-24 15:15:15 +01:00
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement});
|
|
|
|
lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Testing MCAsmInfo's RestrictCommentStringToStartOfStatement attribute.
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckRestrictCommentStringToStartOfStatement) {
|
|
|
|
StringRef AsmStr = "jne #-4";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
2021-04-01 16:38:42 +02:00
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
2021-03-24 15:15:15 +01:00
|
|
|
// When we are restricting the comment string to only the start of the
|
|
|
|
// statement, The sequence of tokens we are expecting are: Identifier - "jne"
|
|
|
|
// Hash - '#'
|
|
|
|
// Minus - '-'
|
|
|
|
// Integer - '4'
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::Hash, AsmToken::Minus,
|
|
|
|
AsmToken::Integer});
|
|
|
|
lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test HLASM Comment Syntax ('*')
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckHLASMComment) {
|
|
|
|
StringRef AsmStr = "* lhi 1,10";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
2021-04-01 16:38:42 +02:00
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
2021-03-24 15:15:15 +01:00
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens);
|
|
|
|
}
|
2021-04-01 16:38:42 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckHashDefault) {
|
|
|
|
StringRef AsmStr = "lh#123";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// "lh" -> Identifier
|
|
|
|
// "#123" -> EndOfStatement (Lexed as a comment since CommentString is "#")
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test if "#" is accepted as an Identifier
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier) {
|
|
|
|
StringRef AsmStr = "lh#123";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// "lh123" -> Identifier
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) {
|
|
|
|
StringRef AsmStr = "lh#12*3";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// "lh#12" -> Identifier
|
|
|
|
// "*" -> Star
|
|
|
|
// "3" -> Integer
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::Star, AsmToken::Integer,
|
|
|
|
AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
[AsmParser][SystemZ][z/OS] Add in support to allow use of additional comment strings.
- Currently, MCAsmInfo provides a CommentString attribute, that various targets can set, so that the AsmLexer can appropriately lex a string as a comment based on the set value of the attribute.
- However, AsmLexer also supports a few additional comment syntaxes, in addition to what's specified as a CommentString attribute. This includes regular C-style block comments (/* ... */), regular C-style line comments (// .... ) and #. While I'm not sure as to why this behaviour exists, I am assuming it does to maintain backward compatibility with GNU AS (see https://sourceware.org/binutils/docs/as/Comments.html#Comments for reference)
For example:
Consider a target which sets the CommentString attribute to '*'.
The following strings are all lexed as comments.
```
"# abc" -> comment
"// abc" -> comment
"/* abc */ -> comment
"* abc" -> comment
```
- In HLASM however, only "*" is accepted as a comment string, and nothing else.
- To achieve this, an additional attribute (`AllowAdditionalComments`) has been added to MCAsmInfo. If this attribute is set to false, then only the string specified by the CommentString attribute is used as a possible comment string to be lexed by the AsmLexer. The regular C-style block comments, line comments and "#" are disabled. As a final note, "#" will still be treated as a comment, if the CommentString attribute is set to "#".
Depends on https://reviews.llvm.org/D99277
Reviewed By: abhina.sreeskantharajan, myiwanch
Differential Revision: https://reviews.llvm.org/D99286
2021-04-13 17:07:46 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, DontCheckStrictCommentString) {
|
|
|
|
StringRef AsmStr = "# abc\n/* def */// xyz";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::EndOfStatement, AsmToken::Comment, AsmToken::EndOfStatement,
|
|
|
|
AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, DontCheckStrictCommentString2) {
|
|
|
|
StringRef AsmStr = "# abc\n/* def */// xyz\n* rst";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::EndOfStatement, AsmToken::Comment, AsmToken::EndOfStatement,
|
|
|
|
AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckStrictCommentString) {
|
|
|
|
StringRef AsmStr = "# abc\n/* def */// xyz";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowAdditionalComments(false);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// "# abc" -> still treated as a comment, since CommentString
|
|
|
|
// is set to "#"
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens;
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "# abc\n"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Star); // "*"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "def"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Star); // "*"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "xyz"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement);
|
|
|
|
ExpectedTokens.push_back(AsmToken::Eof);
|
|
|
|
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckStrictCommentString2) {
|
|
|
|
StringRef AsmStr = "// abc";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowAdditionalComments(false);
|
|
|
|
MUPMAI->setCommentString("//");
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// "// abc" -> will still be treated as a comment because "//" is the
|
|
|
|
// CommentString
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr /* "// abc" */, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckStrictCommentString3) {
|
|
|
|
StringRef AsmStr = "/* abc */";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowAdditionalComments(false);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens;
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash);
|
|
|
|
ExpectedTokens.push_back(AsmToken::Star);
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier);
|
|
|
|
ExpectedTokens.push_back(AsmToken::Star);
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash);
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement);
|
|
|
|
ExpectedTokens.push_back(AsmToken::Eof);
|
|
|
|
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckStrictCommentString4) {
|
|
|
|
StringRef AsmStr = "# abc\n/* def */// xyz";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
MUPMAI->setAllowAdditionalComments(false);
|
|
|
|
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens;
|
|
|
|
ExpectedTokens.push_back(AsmToken::Hash); // "#"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "abc"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Star); // "*"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "def"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Star); // "*"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "xyz"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement);
|
|
|
|
ExpectedTokens.push_back(AsmToken::Eof);
|
|
|
|
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckStrictCommentString5) {
|
|
|
|
StringRef AsmStr = "#abc\n/* def */// xyz";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
MUPMAI->setAllowAdditionalComments(false);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens;
|
|
|
|
ExpectedTokens.push_back(AsmToken::Hash); // "#"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "abc"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Slash); // "/"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "* def */// xyz"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Eof);
|
|
|
|
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
2021-04-13 21:25:00 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckValidHLASMIntegers) {
|
|
|
|
StringRef AsmStr = "123\n000123\n1999\n007\n12300\n12021\n";
|
|
|
|
// StringRef AsmStr = "123";
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setLexHLASMIntegers(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// SmallVector<int64_t> ExpectedValues({123});
|
|
|
|
SmallVector<int64_t> ExpectedValues({123, 123, 1999, 7, 12300, 12021});
|
|
|
|
lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckInvalidHLASMIntegers) {
|
|
|
|
StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n.133\n";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setLexHLASMIntegers(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens;
|
|
|
|
ExpectedTokens.push_back(AsmToken::Integer); // "0"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "b0101"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Integer); // "0"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "xDEADBEEF"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Identifier); // "fffh"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Real); // ".133"
|
|
|
|
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
|
|
|
|
ExpectedTokens.push_back(AsmToken::Eof);
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckDefaultIntegers) {
|
|
|
|
StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<int64_t> ExpectedValues({5, 0xDEADBEEF, 0xFFF});
|
|
|
|
lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckDefaultFloats) {
|
|
|
|
StringRef AsmStr = "0.333\n1.3\n2.5\n3.0\n";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens;
|
|
|
|
|
|
|
|
for (int I = 0; I < 4; ++I)
|
|
|
|
ExpectedTokens.insert(ExpectedTokens.begin(),
|
|
|
|
{AsmToken::Real, AsmToken::EndOfStatement});
|
|
|
|
|
|
|
|
ExpectedTokens.push_back(AsmToken::Eof);
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
[AsmParser][ms][X86] Fix possible misbehaviour in parsing of special tokens at start of string.
- Previously, https://reviews.llvm.org/D72680 introduced a new attribute called `AllowSymbolAtNameStart` (in relation to the MAsmParser changes) in `MCAsmInfo.h` which (according to the comment in the header) allows the following behaviour:
```
/// This is true if the assembler allows $ @ ? characters at the start of
/// symbol names. Defaults to false.
```
- However, the usage of this field in AsmLexer.cpp doesn't seem completely accurate* for a couple of reasons.
```
default:
if (MAI.doesAllowSymbolAtNameStart()) {
// Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
if (!isDigit(CurChar) &&
isIdentifierChar(CurChar, MAI.doesAllowAtInName(),
AllowHashInIdentifier))
return LexIdentifier();
}
```
1. The Dollar and At tokens, when occurring at the start of the string, are treated as separate tokens (AsmToken::Dollar and AsmToken::At respectively) and not lexed as an Identifier.
2. I'm not too sure why `MAI.doesAllowAtInName()` is used when `AllowAtInIdentifier` could be used. For X86 platforms, afaict, this shouldn't be an issue, since the `CommentString` attribute isn't "@". (alternatively the call to the setter can be set anywhere else as needed). The `AllowAtInName` does have an additional important meaning, but in the context of AsmLexer, shouldn't mean anything different compared to `AllowAtInIdentifier`
My proposal is the following:
- Introduce 3 new fields called `AllowQuestionTokenAtStartOfString`, `AllowDollarTokenAtStartOfString` and `AllowAtTokenAtStartOfString` in MCAsmInfo.h which will encapsulate the previously documented behaviour of "allowing $, @, ? characters at the start of symbol names")
- Introduce these fields where "$", "@" are lexed, and treat them as identifiers depending on whether `Allow[Dollar|At]TokenAtStartOfString` is set.
- For the sole case of "?", append it to the existing logic for treating a "default" token as an Identifier.
z/OS (HLASM) will also make use of some of these fields in follow up patches.
completely accurate* - This was based on the comments and the intended behaviour the code. I might have completely misinterpreted it, and if that is the case my sincere apologies. We can close this patch if necessary, if there are no changes to be made :)
Depends on https://reviews.llvm.org/D99374
Reviewed By: Jonathan.Crowther
Differential Revision: https://reviews.llvm.org/D99889
2021-04-21 16:19:52 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckDefaultQuestionAtStartOfIdentifier) {
|
|
|
|
StringRef AsmStr = "?lh1?23";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Error, AsmToken::Identifier, AsmToken::EndOfStatement,
|
|
|
|
AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAcceptQuestionAtStartOfIdentifier) {
|
|
|
|
StringRef AsmStr = "?????lh1?23";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowQuestionAtStartOfIdentifier(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckDefaultAtAtStartOfIdentifier) {
|
|
|
|
StringRef AsmStr = "@@lh1?23";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowQuestionAtStartOfIdentifier(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::At, AsmToken::At, AsmToken::Identifier,
|
|
|
|
AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAcceptAtAtStartOfIdentifier) {
|
|
|
|
StringRef AsmStr = "@@lh1?23";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowAtAtStartOfIdentifier(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAccpetAtAtStartOfIdentifier2) {
|
|
|
|
StringRef AsmStr = "@@lj1?23";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setCommentString("@");
|
|
|
|
MUPMAI->setAllowAtAtStartOfIdentifier(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// "@@lj1?23" -> still lexed as a comment as that takes precedence.
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckDefaultDollarAtStartOfIdentifier) {
|
|
|
|
StringRef AsmStr = "$$ac$c";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Dollar, AsmToken::Dollar, AsmToken::Identifier,
|
|
|
|
AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAcceptDollarAtStartOfIdentifier) {
|
|
|
|
StringRef AsmStr = "$$ab$c";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowDollarAtStartOfIdentifier(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
[AsmParser][SystemZ][z/OS] Use updated framework in AsmLexer to accept special tokens as Identifiers
- Previously, https://reviews.llvm.org/D99889 changed the framework in the AsmLexer to treat special tokens, if they occur at the start of the string, as Identifiers.
- These are used by the MASM Parser implementation in LLVM, and we can extend some of the changes made in the previous patch to SystemZ.
- In SystemZ, the special "tokens" referred to here are "_", "$", "@", "#". [_|$|@|#] are already supported as "part" of an Identifier.
- The changes in this patch ensure that these special tokens, when they occur at the start of the Identifier, are treated as Identifiers.
Reviewed By: abhina.sreeskantharajan
Differential Revision: https://reviews.llvm.org/D100959
2021-04-28 21:42:23 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier) {
|
|
|
|
StringRef AsmStr = "##a#b$c";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowHashAtStartOfIdentifier(true);
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
MUPMAI->setAllowAdditionalComments(false);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier2) {
|
|
|
|
StringRef AsmStr = "##a#b$c";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowHashAtStartOfIdentifier(true);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// By default, the CommentString attribute is set to "#".
|
|
|
|
// Hence, "##a#b$c" is lexed as a line comment irrespective
|
|
|
|
// of whether the AllowHashAtStartOfIdentifier attribute is set to true.
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier3) {
|
|
|
|
StringRef AsmStr = "##a#b$c";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowHashAtStartOfIdentifier(true);
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// By default, the AsmLexer treats strings that start with "#"
|
|
|
|
// as a line comment.
|
|
|
|
// Hence, "##a$b$c" is lexed as a line comment irrespective
|
|
|
|
// of whether the AllowHashAtStartOfIdentifier attribute is set to true.
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckAcceptHashAtStartOfIdentifier4) {
|
|
|
|
StringRef AsmStr = "##a#b$c";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowHashAtStartOfIdentifier(true);
|
|
|
|
MUPMAI->setCommentString("*");
|
|
|
|
MUPMAI->setAllowAdditionalComments(false);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setAllowHashInIdentifier(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
// Since, the AllowAdditionalComments attribute is set to false,
|
|
|
|
// only strings starting with the CommentString attribute are
|
|
|
|
// lexed as possible comments.
|
|
|
|
// Hence, "##a$b$c" is lexed as an Identifier because the
|
|
|
|
// AllowHashAtStartOfIdentifier attribute is set to true.
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
2021-04-29 17:27:56 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckRejectDotAsCurrentPC) {
|
|
|
|
StringRef AsmStr = ".-4";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
MUPMAI->setAllowDotIsPC(false);
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
const MCExpr *Expr;
|
|
|
|
bool ParsePrimaryExpr = Parser->parseExpression(Expr);
|
|
|
|
EXPECT_EQ(ParsePrimaryExpr, true);
|
|
|
|
EXPECT_EQ(Parser->hasPendingError(), true);
|
|
|
|
}
|
[AsmParser][SystemZ][z/OS] Implement HLASM location counter syntax ("*") for Z PC-relative instructions.
- This patch attempts to implement the location counter syntax (*) for the HLASM variant for PC-relative instructions.
- In the HLASM variant, for purely constant relocatable values, we expect a * token preceding it, with special support for " *" which is parsed as "<pc-rel-insn 0>"
- For combinations of absolute values and relocatable values, we don't expect the "*" preceding the token.
When you have a " * " what’s accepted is:
```
*<space>.*{.*} -> <pc-rel-insn> 0
*[+|-][constant-value] -> <pc-rel-insn> [+|-]constant-value
```
When you don’t have a " * " what’s accepted is:
```
brasl 1,func is allowed (MCSymbolRef type)
brasl 1,func+4 is allowed (MCBinary type)
brasl 1,4+func is allowed (MCBinary type)
brasl 1,-4+func is allowed (MCBinary type)
brasl 1,func-4 is allowed (MCBinary type)
brasl 1,*func is not allowed (* cannot be used for non-MCConstantExprs)
brasl 1,*+func is not allowed (* cannot be used for non-MCConstantExprs)
brasl 1,*+func+4 is not allowed (* cannot be used for non-MCConstantExprs)
brasl 1,*+4+func is not allowed (* cannot be used for non-MCConstantExprs)
brasl 1,*-4+8+func is not allowed (* cannot be used for non-MCConstantExprs)
```
Reviewed By: Kai
Differential Revision: https://reviews.llvm.org/D100987
2021-05-03 20:57:45 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckRejectStarAsCurrentPC) {
|
|
|
|
StringRef AsmStr = "*-4";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
const MCExpr *Expr;
|
|
|
|
bool ParsePrimaryExpr = Parser->parseExpression(Expr);
|
|
|
|
EXPECT_EQ(ParsePrimaryExpr, true);
|
|
|
|
EXPECT_EQ(Parser->hasPendingError(), true);
|
|
|
|
}
|
2021-05-05 16:21:27 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckRejectCharLiterals) {
|
|
|
|
StringRef AsmStr = "abc 'd'";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setLexHLASMStrings(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::Error, AsmToken::Error,
|
|
|
|
AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckRejectStringLiterals) {
|
|
|
|
StringRef AsmStr = "abc \"ef\"";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
Parser->getLexer().setLexHLASMStrings(true);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
SmallVector<AsmToken::TokenKind> ExpectedTokens(
|
|
|
|
{AsmToken::Identifier, AsmToken::Error, AsmToken::Identifier,
|
|
|
|
AsmToken::Error, AsmToken::EndOfStatement, AsmToken::Eof});
|
|
|
|
lexAndCheckTokens(AsmStr, ExpectedTokens);
|
|
|
|
}
|
2021-05-26 16:36:50 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckPrintAcceptableSymbol) {
|
|
|
|
std::string AsmStr = "ab13_$.@";
|
|
|
|
EXPECT_EQ(true, MUPMAI->isValidUnquotedName(AsmStr));
|
|
|
|
AsmStr += "#";
|
|
|
|
EXPECT_EQ(false, MUPMAI->isValidUnquotedName(AsmStr));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckPrintAcceptableSymbol2) {
|
|
|
|
MUPMAI->setAssemblerDialect(1);
|
|
|
|
std::string AsmStr = "ab13_$.@";
|
|
|
|
EXPECT_EQ(true, MUPMAI->isValidUnquotedName(AsmStr));
|
|
|
|
AsmStr += "#";
|
|
|
|
EXPECT_EQ(true, MUPMAI->isValidUnquotedName(AsmStr));
|
|
|
|
}
|
2021-06-24 18:49:38 +02:00
|
|
|
|
|
|
|
TEST_F(SystemZAsmLexerTest, CheckLabelCaseUpperCase2) {
|
|
|
|
StringRef AsmStr = "label\nlabel";
|
|
|
|
|
|
|
|
// Setup.
|
|
|
|
setupCallToAsmParser(AsmStr);
|
|
|
|
|
|
|
|
// Lex initially to get the string.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
const MCExpr *Expr;
|
|
|
|
bool ParsePrimaryExpr = Parser->parseExpression(Expr);
|
|
|
|
EXPECT_EQ(ParsePrimaryExpr, false);
|
|
|
|
|
|
|
|
const MCSymbolRefExpr *SymbolExpr = dyn_cast<MCSymbolRefExpr>(Expr);
|
|
|
|
EXPECT_NE(SymbolExpr, nullptr);
|
|
|
|
EXPECT_NE(&SymbolExpr->getSymbol(), nullptr);
|
|
|
|
EXPECT_EQ((&SymbolExpr->getSymbol())->getName(), StringRef("label"));
|
|
|
|
|
|
|
|
// Lex the end of statement token.
|
|
|
|
Parser->getLexer().Lex();
|
|
|
|
|
|
|
|
MUPMAI->setEmitLabelsInUpperCase(true);
|
|
|
|
|
|
|
|
ParsePrimaryExpr = Parser->parseExpression(Expr);
|
|
|
|
EXPECT_EQ(ParsePrimaryExpr, false);
|
|
|
|
|
|
|
|
SymbolExpr = dyn_cast<MCSymbolRefExpr>(Expr);
|
|
|
|
EXPECT_NE(SymbolExpr, nullptr);
|
|
|
|
EXPECT_NE(&SymbolExpr->getSymbol(), nullptr);
|
|
|
|
EXPECT_EQ((&SymbolExpr->getSymbol())->getName(), StringRef("LABEL"));
|
|
|
|
}
|
2021-03-24 15:15:15 +01:00
|
|
|
} // end anonymous namespace
|