From 2bb8df3b5d36bc79f4763bea52c5b0d1e98f0a69 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Sat, 22 Aug 2009 07:22:36 +0000 Subject: [PATCH] llvm-mc: Clean up some handling of symbol/section association to be more correct (external was really undefined and there wasn't an explicit representation for absolute symbols). - This still needs some cleanup to how the absolute "pseudo" section is dealt with, but I haven't figured out the nicest approach yet. llvm-svn: 79733 --- include/llvm/MC/MCSymbol.h | 61 +++++++++++++++++------ include/llvm/MC/MCValue.h | 7 +-- lib/MC/MCAsmStreamer.cpp | 11 ++--- lib/MC/MCMachOStreamer.cpp | 11 ++--- lib/MC/MCSymbol.cpp | 4 ++ test/Scripts/macho-dump | 96 ++++++++++++++++++++++++++++++++----- tools/llvm-mc/AsmParser.cpp | 26 ++-------- 7 files changed, 153 insertions(+), 63 deletions(-) diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index a5176bc24ff..2b570a7ad3c 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -16,6 +16,7 @@ #include #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class MCSection; @@ -30,39 +31,69 @@ namespace llvm { /// it is a reference to an external entity, it has a null section. /// class MCSymbol { + // Special sentinal value for the absolute pseudo section. + // + // FIXME: Use a PointerInt wrapper for this? + static const MCSection *AbsolutePseudoSection; + /// Name - The name of the symbol. std::string Name; - /// Section - The section the symbol is defined in, or null if the symbol - /// has not been defined in the associated translation unit. + + /// Section - The section the symbol is defined in. This is null for + /// undefined symbols, and the special AbsolutePseudoSection value for + /// absolute symbols. const MCSection *Section; /// IsTemporary - True if this is an assembler temporary label, which /// typically does not survive in the .o file's symbol table. Usually /// "Lfoo" or ".foo". unsigned IsTemporary : 1; - - /// IsExternal - True if this symbol has been implicitly defined as an - /// external, for example by using it in an expression without ever emitting - /// it as a label. The @var Section for an external symbol is always null. - unsigned IsExternal : 1; private: // MCContext creates and uniques these. friend class MCContext; MCSymbol(const StringRef &_Name, bool _IsTemporary) - : Name(_Name), Section(0), IsTemporary(_IsTemporary), IsExternal(false) {} + : Name(_Name), Section(0), + IsTemporary(_IsTemporary) {} MCSymbol(const MCSymbol&); // DO NOT IMPLEMENT void operator=(const MCSymbol&); // DO NOT IMPLEMENT public: - - const MCSection *getSection() const { return Section; } - void setSection(const MCSection *S) { Section = S; } - - bool isExternal() const { return IsExternal; } - void setExternal(bool Value) { IsExternal = Value; } - + /// getName - Get the symbol name. const std::string &getName() const { return Name; } + /// @name Symbol Location Functions + /// @{ + + /// isUndefined - Check if this symbol undefined (i.e., implicitly defined). + bool isUndefined() const { + return Section == 0; + } + + /// isAbsolute - Check if this this is an absolute symbol. + bool isAbsolute() const { + return Section == AbsolutePseudoSection; + } + + /// getSection - Get the section associated with a defined, non-absolute + /// symbol. + const MCSection &getSection() const { + assert(!isUndefined() && !isAbsolute() && "Invalid accessor!"); + return *Section; + } + + /// setSection - Mark the symbol as defined in the section \arg S. + void setSection(const MCSection &S) { Section = &S; } + + /// setUndefined - Mark the symbol as undefined. + void setUndefined() { + Section = 0; + } + + /// setAbsolute - Mark the symbol as absolute. + void setAbsolute() { Section = AbsolutePseudoSection; } + + /// @} + /// print - Print the value to the stream \arg OS. void print(raw_ostream &OS) const; diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h index ec323fe1b3f..af56eedece0 100644 --- a/include/llvm/MC/MCValue.h +++ b/include/llvm/MC/MCValue.h @@ -49,9 +49,10 @@ public: /// /// @result - The value's associated section, or null for external or constant /// values. - const MCSection *getAssociatedSection() const { - return SymA ? SymA->getSection() : 0; - } + // + // FIXME: Switch to a tagged section, so this can return the tagged section + // value. + const MCSection *getAssociatedSection() const; /// print - Print the value to the stream \arg OS. void print(raw_ostream &OS) const; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 3dfd8e94748..655137d580c 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -107,14 +107,11 @@ void MCAsmStreamer::SwitchSection(const MCSection *Section) { } void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { - assert(Symbol->getSection() == 0 && "Cannot emit a symbol twice!"); + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(CurSection && "Cannot emit before setting section!"); - assert(!getContext().GetSymbolValue(Symbol) && - "Cannot emit symbol which was directly assigned to!"); OS << Symbol << ":\n"; - Symbol->setSection(CurSection); - Symbol->setExternal(false); + Symbol->setSection(*CurSection); } void MCAsmStreamer::EmitAssemblerFlag(AssemblerFlag Flag) { @@ -127,7 +124,9 @@ void MCAsmStreamer::EmitAssemblerFlag(AssemblerFlag Flag) { void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCValue &Value, bool MakeAbsolute) { - assert(!Symbol->getSection() && "Cannot assign to a label!"); + // Only absolute symbols can be redefined. + assert((Symbol->isUndefined() || Symbol->isAbsolute()) && + "Cannot define a symbol twice!"); if (MakeAbsolute) { OS << ".set " << Symbol << ", " << Value << '\n'; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 316051ce01d..421faef6aa4 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -91,15 +91,12 @@ void MCMachOStreamer::SwitchSection(const MCSection *Section) { } void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { - assert(Symbol->getSection() == 0 && "Cannot emit a symbol twice!"); + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(CurSection && "Cannot emit before setting section!"); - assert(!getContext().GetSymbolValue(Symbol) && - "Cannot emit symbol which was directly assigned to!"); llvm_unreachable("FIXME: Not yet implemented!"); - Symbol->setSection(CurSection); - Symbol->setExternal(false); + Symbol->setSection(*CurSection); } void MCMachOStreamer::EmitAssemblerFlag(AssemblerFlag Flag) { @@ -109,7 +106,9 @@ void MCMachOStreamer::EmitAssemblerFlag(AssemblerFlag Flag) { void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCValue &Value, bool MakeAbsolute) { - assert(!Symbol->getSection() && "Cannot assign to a label!"); + // Only absolute symbols can be redefined. + assert((Symbol->isUndefined() || Symbol->isAbsolute()) && + "Cannot define a symbol twice!"); llvm_unreachable("FIXME: Not yet implemented!"); } diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index d032017ca47..3c9894b691b 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -12,6 +12,10 @@ using namespace llvm; +// Sentinel value for the absolute pseudo section. +const MCSection *MCSymbol::AbsolutePseudoSection = + reinterpret_cast(1); + /// NeedsQuoting - Return true if the string \arg Str needs quoting, i.e., it /// does not match [a-zA-Z_.][a-zA-Z0-9_.]*. // diff --git a/test/Scripts/macho-dump b/test/Scripts/macho-dump index e844197a6cc..d37b545b2ab 100755 --- a/test/Scripts/macho-dump +++ b/test/Scripts/macho-dump @@ -2,32 +2,56 @@ import struct import sys +import StringIO class Reader: def __init__(self, path): if path == '-': - self.file = sys.stdin + # Snarf all the data so we can seek. + self.file = StringIO.StringIO(sys.stdin.read()) else: self.file = open(path,'rb') self.isLSB = None - self.pos = 0 + + self.string_table = None def setLSB(self, isLSB): self.isLSB = bool(isLSB) def tell(self): - return self.pos + return self.file.tell() + + def seek(self, pos): + self.file.seek(pos) def read(self, N): data = self.file.read(N) - self.pos += len(data) if len(data) != N: raise ValueError,"Out of data!" return data + def read8(self): + return ord(self.read(1)) + + def read16(self): + return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0] + def read32(self): return struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0] + def registerStringTable(self, strings): + if self.string_table is not None: + raise ValueError,"%s: warning: multiple string tables" % sys.argv[0] + + self.string_table = strings + + def getString(self, index): + if self.string_table is None: + raise ValueError,"%s: warning: no string table registered" % sys.argv[0] + + end = self.string_table.index('\x00', index) + return self.string_table[index:end] + def dumpmacho(path, opts): f = Reader(path) @@ -60,7 +84,7 @@ def dumpmacho(path, opts): print "])" if f.tell() - start != loadCommandsSize: - raise ValueError,"%s: warning: invalid load commands size: %r" % (sys.argv, loadCommandsSize) + raise ValueError,"%s: warning: invalid load commands size: %r" % (sys.argv[0], loadCommandsSize) def dumpLoadCommand(f, i, opts): start = f.tell() @@ -83,7 +107,7 @@ def dumpLoadCommand(f, i, opts): print " )," if f.tell() - start != cmdSize: - raise ValueError,"%s: warning: invalid load command size: %r" % (sys.argv, cmdSize) + raise ValueError,"%s: warning: invalid load command size: %r" % (sys.argv[0], cmdSize) def dumpSegmentLoadCommand32(f, opts): print " ('segment_name', %r)" % f.read(16) @@ -103,10 +127,45 @@ def dumpSegmentLoadCommand32(f, opts): print " ])" def dumpSymtabCommand(f, opts): - print " ('symoff', %r)" % f.read32() - print " ('nsyms', %r)" % f.read32() - print " ('stroff', %r)" % f.read32() - print " ('strsize', %r)" % f.read32() + symoff = f.read32() + print " ('symoff', %r)" % symoff + nsyms = f.read32() + print " ('nsyms', %r)" % nsyms + stroff = f.read32() + print " ('stroff', %r)" % stroff + strsize = f.read32() + print " ('strsize', %r)" % strsize + + prev_pos = f.tell() + + f.seek(stroff) + string_data = f.read(strsize) + print " ('_string_data', %r)" % string_data + + f.registerStringTable(string_data) + + f.seek(symoff) + print " ('_symbols', [" + for i in range(nsyms): + dumpNlist32(f, i, opts) + print " ])" + + f.seek(prev_pos) + +def dumpNlist32(f, i, opts): + print " # Symbol %r" % i + n_strx = f.read32() + print " (('n_strx', %r)" % n_strx + n_type = f.read8() + print " ('n_type', %#x)" % n_type + n_sect = f.read8() + print " ('n_type', %r)" % n_sect + n_desc = f.read16() + print " ('n_desc', %r)" % n_desc + n_value = f.read32() + print " ('n_value', %r)" % n_value + print " ('_string', %r)" % f.getString(n_strx) + print " )," def dumpDysymtabCommand(f, opts): print " ('ilocalsym', %r)" % f.read32() @@ -121,13 +180,26 @@ def dumpDysymtabCommand(f, opts): print " ('nmodtab', %r)" % f.read32() print " ('extrefsymoff', %r)" % f.read32() print " ('nextrefsyms', %r)" % f.read32() - print " ('indirectsymoff', %r)" % f.read32() - print " ('nindirectsyms', %r)" % f.read32() + indirectsymoff = f.read32() + print " ('indirectsymoff', %r)" % indirectsymoff + nindirectsyms = f.read32() + print " ('nindirectsyms', %r)" % nindirectsyms print " ('extreloff', %r)" % f.read32() print " ('nextrel', %r)" % f.read32() print " ('locreloff', %r)" % f.read32() print " ('nlocrel', %r)" % f.read32() + prev_pos = f.tell() + + f.seek(indirectsymoff) + print " ('_indirect_symbols', [" + for i in range(nindirectsyms): + print " # Indirect Symbol %r" % i + print " (('symbol_index', %r),)," % f.read32() + print " ])" + + f.seek(prev_pos) + def dumpSection32(f, i, opts): print " # Section %r" % i print " (('section_name', %r)" % f.read(16) diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp index 19781212976..d92d514f8a4 100644 --- a/tools/llvm-mc/AsmParser.cpp +++ b/tools/llvm-mc/AsmParser.cpp @@ -149,10 +149,6 @@ bool AsmParser::ParsePrimaryExpr(AsmExpr *&Res) { // This is a label, this should be parsed as part of an expression, to // handle things like LFOO+4. MCSymbol *Sym = Ctx.GetOrCreateSymbol(Lexer.getTok().getIdentifier()); - - // If this is use of an undefined symbol then mark it external. - if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym)) - Sym->setExternal(true); Res = new AsmSymbolRefExpr(Sym); Lexer.Lex(); // Eat identifier. @@ -376,10 +372,8 @@ bool AsmParser::ParseStatement() { // FIXME: This doesn't diagnose assignment to a symbol which has been // implicitly marked as external. MCSymbol *Sym = Ctx.GetOrCreateSymbol(IDVal); - if (Sym->getSection()) + if (!Sym->isUndefined()) return Error(IDLoc, "invalid symbol redefinition"); - if (Ctx.GetSymbolValue(Sym)) - return Error(IDLoc, "symbol already used as assembler variable"); // Since we saw a label, create a symbol and emit it. // FIXME: If the label starts with L it is an assembler temporary label. @@ -687,15 +681,11 @@ bool AsmParser::ParseAssignment(const StringRef &Name, bool IsDotSet) { // Diagnose assignment to a label. // // FIXME: Diagnostics. Note the location of the definition as a label. - // FIXME: This doesn't diagnose assignment to a symbol which has been - // implicitly marked as external. // FIXME: Handle '.'. // FIXME: Diagnose assignment to protected identifier (e.g., register name). MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); - if (Sym->getSection()) - return Error(EqualLoc, "invalid assignment to symbol emitted as a label"); - if (Sym->isExternal()) - return Error(EqualLoc, "invalid assignment to external symbol"); + if (!Sym->isUndefined() && !Sym->isAbsolute()) + return Error(EqualLoc, "symbol has already been defined"); // Do the assignment. Out.EmitAssignment(Sym, Value, IsDotSet); @@ -1117,10 +1107,6 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) { MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); - // If this is use of an undefined symbol then mark it external. - if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym)) - Sym->setExternal(true); - Out.EmitSymbolAttribute(Sym, Attr); if (Lexer.is(AsmToken::EndOfStatement)) @@ -1213,8 +1199,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " "alignment, can't be less than zero"); - // TODO: Symbol must be undefined or it is a error to re-defined the symbol - if (Sym->getSection() || Ctx.GetSymbolValue(Sym)) + if (!Sym->isUndefined()) return Error(IDLoc, "invalid symbol redefinition"); // Create the Symbol as a common or local common with Size and Pow2Alignment @@ -1305,8 +1290,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " "can't be less than zero"); - // TODO: Symbol must be undefined or it is a error to re-defined the symbol - if (Sym->getSection() || Ctx.GetSymbolValue(Sym)) + if (!Sym->isUndefined()) return Error(IDLoc, "invalid symbol redefinition"); // FIXME: Arch specific.