1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[ms] [llvm-ml] Allow arbitrary strings as integer constants

MASM interprets strings in expression contexts as integers expressed in big-endian base-256, treating each character as its ASCII representation.

This completely eliminates the need to special-case single-character strings.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D90788
This commit is contained in:
Eric Astor 2020-11-06 15:18:15 -05:00
parent f15e3a4579
commit fb1b9af1ea
4 changed files with 77 additions and 32 deletions

View File

@ -1332,6 +1332,8 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
/// primaryexpr ::= number
/// primaryexpr ::= '.'
/// primaryexpr ::= ~,+,-,'not' primaryexpr
/// primaryexpr ::= string
/// (a string is interpreted as a 64-bit number in big-endian base-256)
bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
AsmTypeInfo *TypeInfo) {
SMLoc FirstTokenLoc = getLexer().getLoc();
@ -1350,7 +1352,6 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
return false;
case AsmToken::Dollar:
case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
if (parseIdentifier(Identifier)) {
@ -1517,6 +1518,20 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
}
return false;
}
case AsmToken::String: {
// MASM strings (used as constants) are interpreted as big-endian base-256.
SMLoc ValueLoc = getTok().getLoc();
std::string Value;
if (parseEscapedString(Value))
return true;
if (Value.size() > 8)
return Error(ValueLoc, "literal value out of range");
uint64_t IntValue = 0;
for (const unsigned char CharVal : Value)
IntValue = (IntValue << 8) | CharVal;
Res = MCConstantExpr::create(IntValue, getContext());
return false;
}
case AsmToken::Real: {
APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
@ -3168,28 +3183,17 @@ bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
bool MasmParser::parseScalarInitializer(unsigned Size,
SmallVectorImpl<const MCExpr *> &Values,
unsigned StringPadLength) {
if (getTok().is(AsmToken::String)) {
if (Size == 1 && getTok().is(AsmToken::String)) {
std::string Value;
if (parseEscapedString(Value))
return true;
if (Size == 1) {
// Treat each character as an initializer.
for (const char CharVal : Value)
Values.push_back(MCConstantExpr::create(CharVal, getContext()));
// Treat each character as an initializer.
for (const unsigned char CharVal : Value)
Values.push_back(MCConstantExpr::create(CharVal, getContext()));
// Pad the string with spaces to the specified length.
for (size_t i = Value.size(); i < StringPadLength; ++i)
Values.push_back(MCConstantExpr::create(' ', getContext()));
} else {
// Treat the string as an initial value in big-endian representation.
if (Value.size() > Size)
return Error(getTok().getLoc(), "out of range literal value");
uint64_t IntValue = 0;
for (const unsigned char CharVal : Value)
IntValue = (IntValue << 8) | CharVal;
Values.push_back(MCConstantExpr::create(IntValue, getContext()));
}
// Pad the string with spaces to the specified length.
for (size_t i = Value.size(); i < StringPadLength; ++i)
Values.push_back(MCConstantExpr::create(' ', getContext()));
} else {
const MCExpr *Value;
if (parseExpression(Value))

View File

@ -1693,20 +1693,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(Tok.getLoc(), "unknown token in expression");
}
LLVM_FALLTHROUGH;
case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
if (Parser.isParsingMasm() && Tok.is(AsmToken::String)) {
// Single-character strings should be treated as integer constants. This
// includes MASM escapes for quotes.
char Quote = Tok.getString().front();
StringRef Contents = Tok.getStringContents();
if (Contents.size() == 1 || Contents == std::string(2, Quote)) {
if (SM.onInteger(Contents.front(), ErrMsg))
return Error(Tok.getLoc(), ErrMsg);
break;
}
case AsmToken::String: {
if (Parser.isParsingMasm()) {
// MASM parsers handle strings in expressions as constants.
SMLoc ValueLoc = Tok.getLoc();
int64_t Res;
const MCExpr *Val;
if (Parser.parsePrimaryExpr(Val, End, nullptr))
return true;
UpdateLocLex = false;
if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
return Error(ValueLoc, "expected absolute value");
if (SM.onInteger(Res, ErrMsg))
return Error(ValueLoc, ErrMsg);
break;
}
LLVM_FALLTHROUGH;
}
case AsmToken::At:
case AsmToken::Identifier: {
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;

View File

@ -119,4 +119,25 @@ dq_char_test PROC
ret
dq_char_test ENDP
string_constant_test PROC
; CHECK-LABEL: string_constant_test:
mov eax, 'ab'
mov eax, "ab"
; CHECK: mov eax, 24930
; CHECK: mov eax, 24930
mov eax, "abc"
mov eax, 'abc'
; CHECK: mov eax, 6382179
; CHECK: mov eax, 6382179
mov eax, "abc"""
mov eax, 'abc'''
; CHECK: mov eax, 1633837858
; CHECK: mov eax, 1633837863
ret
string_constant_test ENDP
end

View File

@ -0,0 +1,15 @@
; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --implicit-check-not=error:
.code
oversize_string_test PROC
mov rax, "abcdefghi"
mov rax, 'abcdefghi'
; CHECK: error: literal value out of range
; CHECK: error: literal value out of range
ret
oversize_string_test ENDP
end