1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[WebAssembly] Parsing missing directives to produce valid .o

Summary:
The assembler was able to assemble and then dump back to .s, but
was failing to parse certain directives necessary for valid .o
output:
- .type directives are now recognized to distinguish function symbols
  and others.
- .size is now parsed to provide function size.
- .globaltype (introduced in https://reviews.llvm.org/D54012) is now
  recognized to ensure symbols like __stack_pointer have a proper type
  set for both .s and .o output.

Also added tests for the above.

Reviewers: sbc100, dschuff

Subscribers: jgravelle-google, aheejin, dexonsmith, kristina, llvm-commits, sunfish

Differential Revision: https://reviews.llvm.org/D53842

llvm-svn: 346047
This commit is contained in:
Wouter van Oortmerssen 2018-11-02 22:04:33 +00:00
parent b91652e1b0
commit 52e48f1225
3 changed files with 109 additions and 35 deletions

View File

@ -61,7 +61,7 @@ void MCWasmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
void MCWasmStreamer::ChangeSection(MCSection *Section,
const MCExpr *Subsection) {
MCAssembler &Asm = getAssembler();
auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
auto *SectionWasm = cast<MCSectionWasm>(Section);
const MCSymbol *Grp = SectionWasm->getGroup();
if (Grp)
Asm.registerSymbol(*Grp);

View File

@ -25,6 +25,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/TargetRegistry.h"
@ -131,14 +132,14 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
class WebAssemblyAsmParser final : public MCTargetAsmParser {
MCAsmParser &Parser;
MCAsmLexer &Lexer;
MCSymbol *LastLabel;
MCSymbolWasm *LastSymbol;
public:
WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
const MCInstrInfo &mii, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, sti, mii), Parser(Parser),
Lexer(Parser.getLexer()), LastLabel(nullptr) {
setAvailableFeatures(ComputeAvailableFeatures(sti.getFeatureBits()));
WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII), Parser(Parser),
Lexer(Parser.getLexer()), LastSymbol(nullptr) {
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
#define GET_ASSEMBLER_HEADER
@ -168,24 +169,26 @@ public:
return false;
}
MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
std::pair<MVT::SimpleValueType, unsigned>
ParseRegType(const StringRef &RegType) {
// Derive type from .param .local decls, or the instruction itself.
return StringSwitch<MVT::SimpleValueType>(RegType)
.Case("i32", MVT::i32)
.Case("i64", MVT::i64)
.Case("f32", MVT::f32)
.Case("f64", MVT::f64)
.Case("i8x16", MVT::v16i8)
.Case("i16x8", MVT::v8i16)
.Case("i32x4", MVT::v4i32)
.Case("i64x2", MVT::v2i64)
.Case("f32x4", MVT::v4f32)
.Case("f64x2", MVT::v2f64)
return StringSwitch<std::pair<MVT::SimpleValueType, unsigned>>(RegType)
.Case("i32", {MVT::i32, wasm::WASM_TYPE_I32})
.Case("i64", {MVT::i64, wasm::WASM_TYPE_I64})
.Case("f32", {MVT::f32, wasm::WASM_TYPE_F32})
.Case("f64", {MVT::f64, wasm::WASM_TYPE_F64})
.Case("i8x16", {MVT::v16i8, wasm::WASM_TYPE_V128})
.Case("i16x8", {MVT::v8i16, wasm::WASM_TYPE_V128})
.Case("i32x4", {MVT::v4i32, wasm::WASM_TYPE_V128})
.Case("i64x2", {MVT::v2i64, wasm::WASM_TYPE_V128})
.Case("f32x4", {MVT::v4f32, wasm::WASM_TYPE_V128})
.Case("f64x2", {MVT::v2f64, wasm::WASM_TYPE_V128})
// arbitrarily chosen vector type to associate with "v128"
// FIXME: should these be EVTs to avoid this arbitrary hack? Do we want
// to accept more specific SIMD register types?
.Case("v128", MVT::v16i8)
.Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
.Case("v128", {MVT::v16i8, wasm::WASM_TYPE_V128})
.Default({MVT::INVALID_SIMPLE_VALUE_TYPE, wasm::WASM_TYPE_NORESULT});
}
void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
@ -311,24 +314,84 @@ public:
return false;
}
void onLabelParsed(MCSymbol *Symbol) override { LastLabel = Symbol; }
void onLabelParsed(MCSymbol *Symbol) override {
LastSymbol = cast<MCSymbolWasm>(Symbol);
}
bool ParseDirective(AsmToken DirectiveID) override {
// This function has a really weird return value behavior that is different
// from all the other parsing functions:
// - return true && no tokens consumed -> don't know this directive / let
// the generic parser handle it.
// - return true && tokens consumed -> a parsing error occurred.
// - return false -> processed this directive successfully.
assert(DirectiveID.getKind() == AsmToken::Identifier);
auto &Out = getStreamer();
auto &TOut =
reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
// TODO: we're just parsing the subset of directives we're interested in,
// and ignoring ones we don't recognise. We should ideally verify
// all directives here.
// TODO: any time we return an error, at least one token must have been
// consumed, otherwise this will not signal an error to the caller.
if (DirectiveID.getString() == ".type") {
// This could be the start of a function, check if followed by
// "label,@function"
if (!(IsNext(AsmToken::Identifier) && IsNext(AsmToken::Comma) &&
IsNext(AsmToken::At) && Lexer.is(AsmToken::Identifier)))
return Error("Expected label,@type declaration, got: ", Lexer.getTok());
if (!Lexer.is(AsmToken::Identifier))
return Error("Expected label after .type directive, got: ",
Lexer.getTok());
auto WasmSym = cast<MCSymbolWasm>(
TOut.getStreamer().getContext().getOrCreateSymbol(
Lexer.getTok().getString()));
Parser.Lex();
// Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) &&
Lexer.is(AsmToken::Identifier)))
return Error("Expected label,@type declaration, got: ", Lexer.getTok());
auto TypeName = Lexer.getTok().getString();
if (TypeName == "function")
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
else if (TypeName == "global")
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
else
return Error("Unknown WASM symbol type: ", Lexer.getTok());
Parser.Lex();
return Expect(AsmToken::EndOfStatement, "EOL");
} else if (DirectiveID.getString() == ".size") {
if (!Lexer.is(AsmToken::Identifier))
return Error("Expected label after .size directive, got: ",
Lexer.getTok());
auto WasmSym = cast<MCSymbolWasm>(
TOut.getStreamer().getContext().getOrCreateSymbol(
Lexer.getTok().getString()));
Parser.Lex();
if (!IsNext(AsmToken::Comma))
return Error("Expected `,`, got: ", Lexer.getTok());
const MCExpr *Exp;
if (Parser.parseExpression(Exp))
return Error("Cannot parse .size expression: ", Lexer.getTok());
WasmSym->setSize(Exp);
return Expect(AsmToken::EndOfStatement, "EOL");
} else if (DirectiveID.getString() == ".globaltype") {
if (!Lexer.is(AsmToken::Identifier))
return Error("Expected symbol name after .globaltype directive, got: ",
Lexer.getTok());
auto Name = Lexer.getTok().getString();
Parser.Lex();
if (!IsNext(AsmToken::Comma))
return Error("Expected `,`, got: ", Lexer.getTok());
if (!Lexer.is(AsmToken::Identifier))
return Error("Expected type in .globaltype directive, got: ",
Lexer.getTok());
auto Type = ParseRegType(Lexer.getTok().getString()).second;
if (Type == wasm::WASM_TYPE_NORESULT)
return Error("Unknown type in .globaltype directive: ",
Lexer.getTok());
Parser.Lex();
// Now set this symbol with the correct type.
auto WasmSym = cast<MCSymbolWasm>(
TOut.getStreamer().getContext().getOrCreateSymbol(Name));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), true});
// And emit the directive again.
TOut.emitGlobalType(WasmSym);
return Expect(AsmToken::EndOfStatement, "EOL");
} else if (DirectiveID.getString() == ".param" ||
DirectiveID.getString() == ".local") {
// Track the number of locals, needed for correct virtual register
@ -337,7 +400,7 @@ public:
std::vector<MVT> Params;
std::vector<MVT> Locals;
while (Lexer.is(AsmToken::Identifier)) {
auto RegType = ParseRegType(Lexer.getTok().getString());
auto RegType = ParseRegType(Lexer.getTok().getString()).first;
if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE)
return true;
if (DirectiveID.getString() == ".param") {
@ -349,15 +412,20 @@ public:
if (!IsNext(AsmToken::Comma))
break;
}
assert(LastLabel);
TOut.emitParam(LastLabel, Params);
assert(LastSymbol);
// TODO: LastSymbol isn't even used by emitParam, so could be removed.
TOut.emitParam(LastSymbol, Params);
TOut.emitLocal(Locals);
return Expect(AsmToken::EndOfStatement, "EOL");
} else {
// For now, ignore anydirective we don't recognize:
// TODO: remove.
while (Lexer.isNot(AsmToken::EndOfStatement))
Parser.Lex();
return Expect(AsmToken::EndOfStatement, "EOL");
}
return Expect(AsmToken::EndOfStatement, "EOL");
// TODO: current ELF directive parsing is broken, fix this is a followup.
//return true; // We didn't process this directive.
return false;
}
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,

View File

@ -1,4 +1,6 @@
# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s | FileCheck %s
# this one is just here to see if it converts to .o without errors, but doesn't check any output:
# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s
.text
.type test0,@function
@ -56,7 +58,9 @@ test0:
#i32.trunc_s:sat/f32
get_global __stack_pointer@GLOBAL
end_function
.Lfunc_end0:
.size test0, .Lfunc_end0-test0
.globaltype __stack_pointer, i32
# CHECK: .text
# CHECK-LABEL: test0:
@ -104,3 +108,5 @@ test0:
# CHECK-NEXT: end_try
# CHECK-NEXT: get_global __stack_pointer@GLOBAL
# CHECK-NEXT: end_function
# CHECK: .globaltype __stack_pointer, i32