2017-02-10 02:33:54 +01:00
|
|
|
//===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
|
2009-07-20 22:01:54 +02:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2013-01-10 01:45:19 +01:00
|
|
|
#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
|
|
|
|
#define LLVM_MC_MCPARSER_MCASMLEXER_H
|
2009-07-20 22:01:54 +02:00
|
|
|
|
2014-02-01 17:20:54 +01:00
|
|
|
#include "llvm/ADT/APInt.h"
|
2016-04-16 09:51:28 +02:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2009-07-28 19:58:44 +02:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2010-01-20 23:18:24 +01:00
|
|
|
#include "llvm/Support/SMLoc.h"
|
2017-02-10 02:33:54 +01:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <string>
|
2009-07-28 19:58:44 +02:00
|
|
|
|
2009-07-20 22:01:54 +02:00
|
|
|
namespace llvm {
|
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Target independent representation for an assembler token.
|
2010-01-14 22:32:45 +01:00
|
|
|
class AsmToken {
|
|
|
|
public:
|
2009-07-28 19:58:44 +02:00
|
|
|
enum TokenKind {
|
|
|
|
// Markers
|
|
|
|
Eof, Error,
|
|
|
|
|
|
|
|
// String values.
|
|
|
|
Identifier,
|
|
|
|
String,
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2009-07-28 19:58:44 +02:00
|
|
|
// Integer values.
|
|
|
|
Integer,
|
2014-02-01 17:20:54 +01:00
|
|
|
BigNum, // larger than 64 bits
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2010-09-24 03:59:31 +02:00
|
|
|
// Real values.
|
|
|
|
Real,
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2016-06-17 18:06:17 +02:00
|
|
|
// Comments
|
|
|
|
Comment,
|
|
|
|
HashDirective,
|
2009-07-28 19:58:44 +02:00
|
|
|
// No-value.
|
|
|
|
EndOfStatement,
|
|
|
|
Colon,
|
2012-09-19 22:36:12 +02:00
|
|
|
Space,
|
2009-07-28 19:58:44 +02:00
|
|
|
Plus, Minus, Tilde,
|
2016-06-17 18:06:17 +02:00
|
|
|
Slash, // '/'
|
2011-06-05 04:43:45 +02:00
|
|
|
BackSlash, // '\'
|
2009-09-05 00:40:31 +02:00
|
|
|
LParen, RParen, LBrac, RBrac, LCurly, RCurly,
|
2010-04-14 06:40:28 +02:00
|
|
|
Star, Dot, Comma, Dollar, Equal, EqualEqual,
|
2011-02-11 02:21:00 +01:00
|
|
|
|
|
|
|
Pipe, PipePipe, Caret,
|
2009-09-04 23:45:34 +02:00
|
|
|
Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
|
2009-07-28 19:58:44 +02:00
|
|
|
Less, LessEqual, LessLess, LessGreater,
|
2016-08-08 13:50:25 +02:00
|
|
|
Greater, GreaterEqual, GreaterGreater, At,
|
|
|
|
|
|
|
|
// MIPS unary expression operators such as %neg.
|
|
|
|
PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
|
|
|
|
PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
|
|
|
|
PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
|
|
|
|
PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
|
|
|
|
PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
|
|
|
|
PercentTprel_Lo
|
2009-07-28 19:58:44 +02:00
|
|
|
};
|
|
|
|
|
2012-04-16 00:00:22 +02:00
|
|
|
private:
|
2009-07-28 19:58:44 +02:00
|
|
|
TokenKind Kind;
|
|
|
|
|
|
|
|
/// A reference to the entire token contents; this is always a pointer into
|
|
|
|
/// a memory buffer owned by the source manager.
|
|
|
|
StringRef Str;
|
|
|
|
|
2014-02-01 17:20:54 +01:00
|
|
|
APInt IntVal;
|
2009-07-28 19:58:44 +02:00
|
|
|
|
|
|
|
public:
|
2017-02-10 02:33:54 +01:00
|
|
|
AsmToken() = default;
|
2015-03-16 19:06:57 +01:00
|
|
|
AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
|
2016-05-27 16:27:24 +02:00
|
|
|
: Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
|
2015-03-16 19:06:57 +01:00
|
|
|
AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
|
|
|
|
: Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
|
2009-07-28 19:58:44 +02:00
|
|
|
|
|
|
|
TokenKind getKind() const { return Kind; }
|
|
|
|
bool is(TokenKind K) const { return Kind == K; }
|
|
|
|
bool isNot(TokenKind K) const { return Kind != K; }
|
|
|
|
|
|
|
|
SMLoc getLoc() const;
|
2011-10-16 14:10:27 +02:00
|
|
|
SMLoc getEndLoc() const;
|
2014-10-03 17:37:37 +02:00
|
|
|
SMRange getLocRange() const;
|
2009-07-28 19:58:44 +02:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the contents of a string token (without quotes).
|
2011-02-11 02:21:00 +01:00
|
|
|
StringRef getStringContents() const {
|
2009-07-31 23:55:09 +02:00
|
|
|
assert(Kind == String && "This token isn't a string!");
|
|
|
|
return Str.slice(1, Str.size() - 1);
|
|
|
|
}
|
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the identifier string for the current token, which should be an
|
|
|
|
/// identifier or a string. This gets the portion of the string which should
|
|
|
|
/// be used as the identifier, e.g., it does not include the quotes on
|
|
|
|
/// strings.
|
2009-07-31 23:55:09 +02:00
|
|
|
StringRef getIdentifier() const {
|
|
|
|
if (Kind == Identifier)
|
|
|
|
return getString();
|
|
|
|
return getStringContents();
|
|
|
|
}
|
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the string for the current token, this includes all characters (for
|
|
|
|
/// example, the quotes on strings) in the token.
|
2009-07-28 19:58:44 +02:00
|
|
|
///
|
|
|
|
/// The returned StringRef points into the source manager's memory buffer, and
|
|
|
|
/// is safe to store across calls to Lex().
|
|
|
|
StringRef getString() const { return Str; }
|
|
|
|
|
|
|
|
// FIXME: Don't compute this in advance, it makes every token larger, and is
|
|
|
|
// also not generally what we want (it is nicer for recovery etc. to lex 123br
|
|
|
|
// as a single token, then diagnose as an invalid number).
|
2011-02-11 02:21:00 +01:00
|
|
|
int64_t getIntVal() const {
|
2009-07-31 23:55:09 +02:00
|
|
|
assert(Kind == Integer && "This token isn't an integer!");
|
2014-02-01 17:20:54 +01:00
|
|
|
return IntVal.getZExtValue();
|
|
|
|
}
|
|
|
|
|
|
|
|
APInt getAPIntVal() const {
|
|
|
|
assert((Kind == Integer || Kind == BigNum) &&
|
|
|
|
"This token isn't an integer!");
|
2011-02-11 02:21:00 +01:00
|
|
|
return IntVal;
|
2009-07-28 19:58:44 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-12-08 11:31:21 +01:00
|
|
|
/// A callback class which is notified of each comment in an assembly file as
|
|
|
|
/// it is lexed.
|
|
|
|
class AsmCommentConsumer {
|
|
|
|
public:
|
2017-02-10 02:33:54 +01:00
|
|
|
virtual ~AsmCommentConsumer() = default;
|
2016-12-08 11:31:21 +01:00
|
|
|
|
|
|
|
/// Callback function for when a comment is lexed. Loc is the start of the
|
|
|
|
/// comment text (excluding the comment-start marker). CommentText is the text
|
|
|
|
/// of the comment, excluding the comment start and end markers, and the
|
|
|
|
/// newline for single-line comments.
|
|
|
|
virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Generic assembler lexer interface, for use by target specific assembly
|
|
|
|
/// lexers.
|
2009-07-20 22:01:54 +02:00
|
|
|
class MCAsmLexer {
|
2009-07-28 19:58:44 +02:00
|
|
|
/// The current token, stored in the base class for faster access.
|
2015-11-09 00:48:23 +01:00
|
|
|
SmallVector<AsmToken, 1> CurTok;
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2010-01-20 23:18:24 +01:00
|
|
|
/// The location and description of the current error
|
|
|
|
SMLoc ErrLoc;
|
|
|
|
std::string Err;
|
2009-07-28 19:58:44 +02:00
|
|
|
|
2009-07-20 22:01:54 +02:00
|
|
|
protected: // Can only create subclasses.
|
2017-02-10 02:33:54 +01:00
|
|
|
const char *TokStart = nullptr;
|
|
|
|
bool SkipSpace = true;
|
2014-01-15 23:40:02 +01:00
|
|
|
bool AllowAtInIdentifier;
|
2017-02-10 02:33:54 +01:00
|
|
|
bool IsAtStartOfStatement = true;
|
|
|
|
AsmCommentConsumer *CommentConsumer = nullptr;
|
2010-07-12 19:10:00 +02:00
|
|
|
|
2009-07-20 22:01:54 +02:00
|
|
|
MCAsmLexer();
|
2009-07-28 19:58:44 +02:00
|
|
|
|
|
|
|
virtual AsmToken LexToken() = 0;
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2015-09-21 01:35:59 +02:00
|
|
|
void SetError(SMLoc errLoc, const std::string &err) {
|
2010-01-20 23:18:24 +01:00
|
|
|
ErrLoc = errLoc;
|
|
|
|
Err = err;
|
|
|
|
}
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2009-07-20 22:01:54 +02:00
|
|
|
public:
|
2017-02-10 02:33:54 +01:00
|
|
|
MCAsmLexer(const MCAsmLexer &) = delete;
|
|
|
|
MCAsmLexer &operator=(const MCAsmLexer &) = delete;
|
2009-07-20 22:01:54 +02:00
|
|
|
virtual ~MCAsmLexer();
|
2009-07-28 19:58:44 +02:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Consume the next token from the input stream and return it.
|
2009-07-28 19:58:44 +02:00
|
|
|
///
|
|
|
|
/// The lexer will continuosly return the end-of-file token once the end of
|
|
|
|
/// the main input file has been reached.
|
|
|
|
const AsmToken &Lex() {
|
2015-11-09 00:48:23 +01:00
|
|
|
assert(!CurTok.empty());
|
2016-09-16 20:30:20 +02:00
|
|
|
// Mark if we parsing out a EndOfStatement.
|
|
|
|
IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
|
2015-11-09 00:48:23 +01:00
|
|
|
CurTok.erase(CurTok.begin());
|
2016-06-17 18:06:17 +02:00
|
|
|
// LexToken may generate multiple tokens via UnLex but will always return
|
|
|
|
// the first one. Place returned value at head of CurTok vector.
|
|
|
|
if (CurTok.empty()) {
|
|
|
|
AsmToken T = LexToken();
|
|
|
|
CurTok.insert(CurTok.begin(), T);
|
|
|
|
}
|
2015-11-09 00:48:23 +01:00
|
|
|
return CurTok.front();
|
|
|
|
}
|
|
|
|
|
|
|
|
void UnLex(AsmToken const &Token) {
|
2016-09-16 20:30:20 +02:00
|
|
|
IsAtStartOfStatement = false;
|
2015-11-09 00:48:23 +01:00
|
|
|
CurTok.insert(CurTok.begin(), Token);
|
2009-07-28 19:58:44 +02:00
|
|
|
}
|
|
|
|
|
2016-09-16 20:30:20 +02:00
|
|
|
bool isAtStartOfStatement() { return IsAtStartOfStatement; }
|
|
|
|
|
2010-07-12 22:32:33 +02:00
|
|
|
virtual StringRef LexUntilEndOfStatement() = 0;
|
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the current source location.
|
2010-07-12 19:10:00 +02:00
|
|
|
SMLoc getLoc() const;
|
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the current (last) lexed token.
|
2014-11-11 06:11:47 +01:00
|
|
|
const AsmToken &getTok() const {
|
2015-11-09 00:48:23 +01:00
|
|
|
return CurTok[0];
|
2009-07-28 19:58:44 +02:00
|
|
|
}
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Look ahead at the next token to be lexed.
|
2015-08-17 16:35:25 +02:00
|
|
|
const AsmToken peekTok(bool ShouldSkipSpace = true) {
|
|
|
|
AsmToken Tok;
|
|
|
|
|
|
|
|
MutableArrayRef<AsmToken> Buf(Tok);
|
|
|
|
size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
|
|
|
|
|
|
|
|
assert(ReadCount == 1);
|
|
|
|
(void)ReadCount;
|
|
|
|
|
|
|
|
return Tok;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Look ahead an arbitrary number of tokens.
|
|
|
|
virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
|
|
|
|
bool ShouldSkipSpace = true) = 0;
|
2014-02-10 00:29:24 +01:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the current error location
|
2015-09-21 01:35:59 +02:00
|
|
|
SMLoc getErrLoc() {
|
2010-01-20 23:18:24 +01:00
|
|
|
return ErrLoc;
|
|
|
|
}
|
2011-02-11 02:21:00 +01:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the current error string
|
2010-01-20 23:18:24 +01:00
|
|
|
const std::string &getErr() {
|
|
|
|
return Err;
|
|
|
|
}
|
2009-07-28 19:58:44 +02:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Get the kind of current token.
|
2015-10-12 19:57:02 +02:00
|
|
|
AsmToken::TokenKind getKind() const { return getTok().getKind(); }
|
2009-07-28 19:58:44 +02:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Check if the current token has kind \p K.
|
2015-10-12 19:57:02 +02:00
|
|
|
bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
|
2009-07-28 19:58:44 +02:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Check if the current token has kind \p K.
|
2015-10-12 19:57:02 +02:00
|
|
|
bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
|
2012-09-19 22:36:12 +02:00
|
|
|
|
2014-11-11 05:49:14 +01:00
|
|
|
/// Set whether spaces should be ignored by the lexer
|
2012-09-19 22:36:12 +02:00
|
|
|
void setSkipSpace(bool val) { SkipSpace = val; }
|
2014-01-15 23:40:02 +01:00
|
|
|
|
|
|
|
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
|
|
|
|
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
|
2016-12-08 11:31:21 +01:00
|
|
|
|
|
|
|
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
|
|
|
|
this->CommentConsumer = CommentConsumer;
|
|
|
|
}
|
2009-07-20 22:01:54 +02:00
|
|
|
};
|
|
|
|
|
2017-02-10 02:33:54 +01:00
|
|
|
} // end namespace llvm
|
2009-07-20 22:01:54 +02:00
|
|
|
|
2017-02-10 02:33:54 +01:00
|
|
|
#endif // LLVM_MC_MCPARSER_MCASMLEXER_H
|