mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[ms] [llvm-ml] Allow arbitrary strings as integer constants
MASM interprets strings in expression contexts as integers expressed in big-endian base-256, treating each character as its ASCII representation. This completely eliminates the need to special-case single-character strings. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D90788
This commit is contained in:
parent
f15e3a4579
commit
fb1b9af1ea
@ -1332,6 +1332,8 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
|
||||
/// primaryexpr ::= number
|
||||
/// primaryexpr ::= '.'
|
||||
/// primaryexpr ::= ~,+,-,'not' primaryexpr
|
||||
/// primaryexpr ::= string
|
||||
/// (a string is interpreted as a 64-bit number in big-endian base-256)
|
||||
bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
|
||||
AsmTypeInfo *TypeInfo) {
|
||||
SMLoc FirstTokenLoc = getLexer().getLoc();
|
||||
@ -1350,7 +1352,6 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
|
||||
return false;
|
||||
case AsmToken::Dollar:
|
||||
case AsmToken::At:
|
||||
case AsmToken::String:
|
||||
case AsmToken::Identifier: {
|
||||
StringRef Identifier;
|
||||
if (parseIdentifier(Identifier)) {
|
||||
@ -1517,6 +1518,20 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case AsmToken::String: {
|
||||
// MASM strings (used as constants) are interpreted as big-endian base-256.
|
||||
SMLoc ValueLoc = getTok().getLoc();
|
||||
std::string Value;
|
||||
if (parseEscapedString(Value))
|
||||
return true;
|
||||
if (Value.size() > 8)
|
||||
return Error(ValueLoc, "literal value out of range");
|
||||
uint64_t IntValue = 0;
|
||||
for (const unsigned char CharVal : Value)
|
||||
IntValue = (IntValue << 8) | CharVal;
|
||||
Res = MCConstantExpr::create(IntValue, getContext());
|
||||
return false;
|
||||
}
|
||||
case AsmToken::Real: {
|
||||
APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
|
||||
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
|
||||
@ -3168,28 +3183,17 @@ bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
|
||||
bool MasmParser::parseScalarInitializer(unsigned Size,
|
||||
SmallVectorImpl<const MCExpr *> &Values,
|
||||
unsigned StringPadLength) {
|
||||
if (getTok().is(AsmToken::String)) {
|
||||
if (Size == 1 && getTok().is(AsmToken::String)) {
|
||||
std::string Value;
|
||||
if (parseEscapedString(Value))
|
||||
return true;
|
||||
if (Size == 1) {
|
||||
// Treat each character as an initializer.
|
||||
for (const char CharVal : Value)
|
||||
Values.push_back(MCConstantExpr::create(CharVal, getContext()));
|
||||
// Treat each character as an initializer.
|
||||
for (const unsigned char CharVal : Value)
|
||||
Values.push_back(MCConstantExpr::create(CharVal, getContext()));
|
||||
|
||||
// Pad the string with spaces to the specified length.
|
||||
for (size_t i = Value.size(); i < StringPadLength; ++i)
|
||||
Values.push_back(MCConstantExpr::create(' ', getContext()));
|
||||
} else {
|
||||
// Treat the string as an initial value in big-endian representation.
|
||||
if (Value.size() > Size)
|
||||
return Error(getTok().getLoc(), "out of range literal value");
|
||||
|
||||
uint64_t IntValue = 0;
|
||||
for (const unsigned char CharVal : Value)
|
||||
IntValue = (IntValue << 8) | CharVal;
|
||||
Values.push_back(MCConstantExpr::create(IntValue, getContext()));
|
||||
}
|
||||
// Pad the string with spaces to the specified length.
|
||||
for (size_t i = Value.size(); i < StringPadLength; ++i)
|
||||
Values.push_back(MCConstantExpr::create(' ', getContext()));
|
||||
} else {
|
||||
const MCExpr *Value;
|
||||
if (parseExpression(Value))
|
||||
|
@ -1693,20 +1693,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
|
||||
return Error(Tok.getLoc(), "unknown token in expression");
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case AsmToken::At:
|
||||
case AsmToken::String:
|
||||
case AsmToken::Identifier: {
|
||||
if (Parser.isParsingMasm() && Tok.is(AsmToken::String)) {
|
||||
// Single-character strings should be treated as integer constants. This
|
||||
// includes MASM escapes for quotes.
|
||||
char Quote = Tok.getString().front();
|
||||
StringRef Contents = Tok.getStringContents();
|
||||
if (Contents.size() == 1 || Contents == std::string(2, Quote)) {
|
||||
if (SM.onInteger(Contents.front(), ErrMsg))
|
||||
return Error(Tok.getLoc(), ErrMsg);
|
||||
break;
|
||||
}
|
||||
case AsmToken::String: {
|
||||
if (Parser.isParsingMasm()) {
|
||||
// MASM parsers handle strings in expressions as constants.
|
||||
SMLoc ValueLoc = Tok.getLoc();
|
||||
int64_t Res;
|
||||
const MCExpr *Val;
|
||||
if (Parser.parsePrimaryExpr(Val, End, nullptr))
|
||||
return true;
|
||||
UpdateLocLex = false;
|
||||
if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
|
||||
return Error(ValueLoc, "expected absolute value");
|
||||
if (SM.onInteger(Res, ErrMsg))
|
||||
return Error(ValueLoc, ErrMsg);
|
||||
break;
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
}
|
||||
case AsmToken::At:
|
||||
case AsmToken::Identifier: {
|
||||
SMLoc IdentLoc = Tok.getLoc();
|
||||
StringRef Identifier = Tok.getString();
|
||||
UpdateLocLex = false;
|
||||
|
@ -119,4 +119,25 @@ dq_char_test PROC
|
||||
ret
|
||||
dq_char_test ENDP
|
||||
|
||||
string_constant_test PROC
|
||||
; CHECK-LABEL: string_constant_test:
|
||||
|
||||
mov eax, 'ab'
|
||||
mov eax, "ab"
|
||||
; CHECK: mov eax, 24930
|
||||
; CHECK: mov eax, 24930
|
||||
|
||||
mov eax, "abc"
|
||||
mov eax, 'abc'
|
||||
; CHECK: mov eax, 6382179
|
||||
; CHECK: mov eax, 6382179
|
||||
|
||||
mov eax, "abc"""
|
||||
mov eax, 'abc'''
|
||||
; CHECK: mov eax, 1633837858
|
||||
; CHECK: mov eax, 1633837863
|
||||
|
||||
ret
|
||||
string_constant_test ENDP
|
||||
|
||||
end
|
||||
|
15
test/tools/llvm-ml/strings_errors.test
Normal file
15
test/tools/llvm-ml/strings_errors.test
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --implicit-check-not=error:
|
||||
|
||||
.code
|
||||
|
||||
oversize_string_test PROC
|
||||
|
||||
mov rax, "abcdefghi"
|
||||
mov rax, 'abcdefghi'
|
||||
; CHECK: error: literal value out of range
|
||||
; CHECK: error: literal value out of range
|
||||
|
||||
ret
|
||||
oversize_string_test ENDP
|
||||
|
||||
end
|
Loading…
Reference in New Issue
Block a user