From a4b0bd1d7d5a278251e2b9aa2cee5ed27debc4ec Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 13 Jul 2020 10:33:15 -0400 Subject: [PATCH] [ms] [llvm-ml] Improve MASM STRUCT field accessor support Summary: Adds support for several accessors: - `[.].` - `[..].` (where `field` has already-defined STRUCT type) - `[.].` (where `field` has already-defined STRUCT type) Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D83344 --- include/llvm/MC/MCParser/MCAsmParser.h | 8 +++- lib/MC/MCParser/MasmParser.cpp | 55 ++++++++++++++++------- lib/Target/X86/AsmParser/X86AsmParser.cpp | 33 ++++++++------ test/tools/llvm-ml/struct.test | 50 ++++++++++++++++++++- 4 files changed, 113 insertions(+), 33 deletions(-) diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 20400897595..a68066e0f50 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -170,8 +170,12 @@ public: virtual bool isParsingMasm() const { return false; } - virtual bool LookUpFieldOffset(StringRef Base, StringRef Member, - unsigned &Offset) { + virtual bool lookUpField(StringRef Name, StringRef &Type, + unsigned &Offset) const { + return true; + } + virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, + unsigned &Offset) const { return true; } diff --git a/lib/MC/MCParser/MasmParser.cpp b/lib/MC/MCParser/MasmParser.cpp index 3dbd00aae47..d7d0508cabf 100644 --- a/lib/MC/MCParser/MasmParser.cpp +++ b/lib/MC/MCParser/MasmParser.cpp @@ -490,8 +490,10 @@ public: bool isParsingMasm() const override { return true; } - bool LookUpFieldOffset(StringRef Base, StringRef Member, - unsigned &Offset) override; + bool lookUpField(StringRef Name, StringRef &Type, + unsigned &Offset) const override; + bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, + unsigned &Offset) const override; bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, @@ -561,8 +563,8 @@ private: } static void DiagHandler(const SMDiagnostic &Diag, void *Context); - bool LookUpFieldOffset(const StructInfo &Structure, StringRef Member, - unsigned &Offset); + bool lookUpField(const StructInfo &Structure, StringRef Member, + StringRef &Type, unsigned &Offset) const; /// Should we emit DWARF describing this assembler source? (Returns false if /// the source has .file directives, which means we don't want to generate @@ -1397,12 +1399,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } // Find the field offset if used. + StringRef Type; unsigned Offset = 0; Split = SymbolName.split('.'); if (!Split.second.empty()) { SymbolName = Split.first; if (Structs.count(SymbolName.lower()) && - !LookUpFieldOffset(SymbolName, Split.second, Offset)) { + !lookUpField(SymbolName, Split.second, Type, Offset)) { // This is actually a reference to a field offset. Res = MCConstantExpr::create(Offset, getContext()); return false; @@ -1410,10 +1413,10 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { auto TypeIt = KnownType.find(SymbolName); if (TypeIt == KnownType.end() || - LookUpFieldOffset(*TypeIt->second, Split.second, Offset)) { + lookUpField(*TypeIt->second, Split.second, Type, Offset)) { std::pair BaseMember = Split.second.split('.'); StringRef Base = BaseMember.first, Member = BaseMember.second; - LookUpFieldOffset(Base, Member, Offset); + lookUpField(Base, Member, Type, Offset); } } @@ -6519,34 +6522,56 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA, llvm_unreachable("Unstable rewrite sort."); } -bool MasmParser::LookUpFieldOffset(StringRef Base, StringRef Member, - unsigned &Offset) { +bool MasmParser::lookUpField(StringRef Name, StringRef &Type, + unsigned &Offset) const { + const std::pair BaseMember = Name.split('.'); + const StringRef Base = BaseMember.first, Member = BaseMember.second; + return lookUpField(Base, Member, Type, Offset); +} + +bool MasmParser::lookUpField(StringRef Base, StringRef Member, StringRef &Type, + unsigned &Offset) const { if (Base.empty()) return true; + unsigned BaseOffset = 0; + if (Base.contains('.') && !lookUpField(Base, Type, BaseOffset)) + Base = Type; + auto TypeIt = KnownType.find(Base); if (TypeIt != KnownType.end()) - return LookUpFieldOffset(*TypeIt->second, Member, Offset); + return lookUpField(*TypeIt->second, Member, Type, Offset); auto StructIt = Structs.find(Base.lower()); if (StructIt != Structs.end()) - return LookUpFieldOffset(StructIt->second, Member, Offset); + return lookUpField(StructIt->second, Member, Type, Offset); return true; } -bool MasmParser::LookUpFieldOffset(const StructInfo &Structure, - StringRef Member, unsigned &Offset) { +bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, + StringRef &Type, unsigned &Offset) const { + if (Member.empty()) { + Type = Structure.Name; + return false; + } + std::pair Split = Member.split('.'); const StringRef FieldName = Split.first, FieldMember = Split.second; + auto StructIt = Structs.find(FieldName.lower()); + if (StructIt != Structs.end()) + return lookUpField(StructIt->second, FieldMember, Type, Offset); + auto FieldIt = Structure.FieldsByName.find(FieldName.lower()); if (FieldIt == Structure.FieldsByName.end()) return true; const FieldInfo &Field = Structure.Fields[FieldIt->second]; if (FieldMember.empty()) { - Offset = Field.Offset; + Offset += Field.Offset; + if (Field.Contents.FT == FT_STRUCT) + Type = Field.Contents.StructInfo.Structure.Name; return false; } @@ -6554,7 +6579,7 @@ bool MasmParser::LookUpFieldOffset(const StructInfo &Structure, return true; const StructFieldInfo &StructInfo = Field.Contents.StructInfo; - bool Result = LookUpFieldOffset(StructInfo.Structure, FieldMember, Offset); + bool Result = lookUpField(StructInfo.Structure, FieldMember, Type, Offset); if (Result) return true; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 0573d4eec05..fe09b2952f0 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -358,6 +358,7 @@ private: bool MemExpr; bool OffsetOperator; SMLoc OffsetOperatorLoc; + StringRef CurType; bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { if (Sym) { @@ -385,6 +386,7 @@ private: unsigned getScale() { return Scale; } const MCExpr *getSym() { return Sym; } StringRef getSymName() { return SymName; } + StringRef getType() { return CurType; } int64_t getImm() { return Imm + IC.execute(); } bool isValidEndState() { return State == IES_RBRAC || State == IES_INTEGER; @@ -846,6 +848,7 @@ private: } return false; } + void setType(StringRef Type) { CurType = Type; } }; bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, @@ -1641,27 +1644,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { break; } if (Parser.isParsingMasm()) { - const std::pair RegField = + const std::pair IDField = Tok.getString().split('.'); - const StringRef RegName = RegField.first, Field = RegField.second; - SMLoc RegEndLoc = - SMLoc::getFromPointer(RegName.data() + RegName.size()); + const StringRef ID = IDField.first, Field = IDField.second; + SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); if (!Field.empty() && - !MatchRegisterByName(Reg, RegName, IdentLoc, RegEndLoc)) { + !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { if (SM.onRegister(Reg, ErrMsg)) return Error(IdentLoc, ErrMsg); + StringRef Type; + unsigned Offset = 0; SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); - const std::pair BaseMember = Field.split('.'); - const StringRef Base = BaseMember.first, Member = BaseMember.second; - - unsigned Offset; - if (Parser.LookUpFieldOffset(Base, Member, Offset)) + if (Parser.lookUpField(Field, Type, Offset)) return Error(FieldStartLoc, "unknown offset"); else if (SM.onPlus(ErrMsg)) return Error(getTok().getLoc(), ErrMsg); else if (SM.onInteger(Offset, ErrMsg)) return Error(IdentLoc, ErrMsg); + SM.setType(Type); End = consumeToken(); break; @@ -1915,9 +1916,11 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) { } /// Parse the '.' operator. -bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { +bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, + SMLoc &End) { const AsmToken &Tok = getTok(); - unsigned Offset; + StringRef Type; + unsigned Offset = 0; // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); @@ -1933,8 +1936,9 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) Tok.is(AsmToken::Identifier)) { const std::pair BaseMember = DotDispStr.split('.'); const StringRef Base = BaseMember.first, Member = BaseMember.second; - if (getParser().LookUpFieldOffset(SM.getSymName(), DotDispStr, Offset) && - getParser().LookUpFieldOffset(Base, Member, Offset) && + if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) && + getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) && + getParser().lookUpField(DotDispStr, Type, Offset) && (!SemaCallback || SemaCallback->LookupInlineAsmField(Base, Member, Offset))) return Error(Tok.getLoc(), "Unable to lookup field reference!"); @@ -1947,6 +1951,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) while (Tok.getLoc().getPointer() < DotExprEndLoc) Lex(); SM.addImm(Offset); + SM.setType(Type); return false; } diff --git a/test/tools/llvm-ml/struct.test b/test/tools/llvm-ml/struct.test index 0e60d244945..ecd89a14037 100644 --- a/test/tools/llvm-ml/struct.test +++ b/test/tools/llvm-ml/struct.test @@ -85,13 +85,11 @@ t3: mov eax, t2.f.h mov eax, [t2].f.h mov eax, [t2.f.h] -mov eax, t2.FOOBAR.f.h ; CHECK: t3: ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] t4: mov eax, j.FOOBAR.f.h @@ -101,4 +99,52 @@ mov eax, j.baz.b ; CHECK-NEXT: mov eax, dword ptr [rip + j+12] ; CHECK-NEXT: mov eax, dword ptr [rip + j+1] +t5: +mov eax, [ebx].FOOBAR.f.h +mov eax, [ebx.FOOBAR].f.h +mov eax, [ebx.FOOBAR.f.h] + +; CHECK: t5: +; CHECK-NEXT: mov eax, dword ptr [ebx + 12] +; CHECK-NEXT: mov eax, dword ptr [ebx + 12] +; CHECK-NEXT: mov eax, dword ptr [ebx + 12] + +t6: +mov eax, t2.FOOBAR.f.h +mov eax, [t2].FOOBAR.f.h +mov eax, [t2.FOOBAR].f.h +mov eax, [t2.FOOBAR.f.h] + +; CHECK: t6: +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] + +t7: +mov eax, [ebx].FOOBAR.e.b +mov eax, [ebx.FOOBAR].e.b +mov eax, [ebx.FOOBAR.e].b +mov eax, [ebx.FOOBAR.e.b] + +; CHECK: t7: +; CHECK-NEXT: mov eax, dword ptr [ebx + 9] +; CHECK-NEXT: mov eax, dword ptr [ebx + 9] +; CHECK-NEXT: mov eax, dword ptr [ebx + 9] +; CHECK-NEXT: mov eax, dword ptr [ebx + 9] + +t8: +mov eax, t2.FOOBAR.e.b +mov eax, [t2].FOOBAR.e.b +mov eax, [t2.FOOBAR].e.b +mov eax, [t2.FOOBAR.e].b +mov eax, [t2.FOOBAR.e.b] + +; CHECK: t8: +; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] +; CHECK-NEXT: mov eax, dword ptr [rip + (t2+8)+1] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+9] + END