From 1254b69520b9b9492dd850aa87908af80ed8738f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 9 Jul 2019 00:34:08 +0000 Subject: [PATCH] Let unaliased Args track which Alias they were created from, and use that in Arg::getAsString() for diagnostics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this, `clang-cl /source-charset:utf-16 test.cc` now prints `invalid value 'utf-16' in '/source-charset:utf-16'` instead of `invalid value 'utf-16' in '-finput-charset=utf-16'` before, and several other clang-cl flags produce much less confusing output as well. Fixes PR29106. Since an arg and its alias can have different arg types (joined vs not) and different values (because of AliasArgs<>), I chose to give the Alias its own Arg object. For convenience, I just store the alias directly in the unaliased arg – there aren't many arg objects at runtime, so that seems ok. Finally, I changed Arg::getAsString() to use the alias's representation if it's present – that function was already documented as being the suitable function for diagnostics, and most callers already used it for diagnostics. Implementation-wise, Arg::accept() previously used to parse things as the unaliased option. The core of that switch is now extracted into a new function acceptInternal() which parses as the _aliased_ option, and the previously-intermingled unaliasing is now done as an explicit step afterwards. (This also changes one place in lld that didn't use getAsString() for diagnostics, so that that one place now also prints the flag as the user wrote it, not as it looks after it went through unaliasing.) Differential Revision: https://reviews.llvm.org/D64253 llvm-svn: 365413 --- include/llvm/Option/Arg.h | 16 ++++- include/llvm/Option/Option.h | 5 ++ lib/Option/Arg.cpp | 3 + lib/Option/Option.cpp | 115 ++++++++++++++++++++++------------- 4 files changed, 95 insertions(+), 44 deletions(-) diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h index 8c4787051cf..22e2bcf06a6 100644 --- a/include/llvm/Option/Arg.h +++ b/include/llvm/Option/Arg.h @@ -58,6 +58,11 @@ private: /// The argument values, as C strings. SmallVector Values; + /// If this arg was created through an alias, this is the original alias arg. + /// For example, *this might be "-finput-charset=utf-8" and Alias might + /// point to an arg representing "/source-charset:utf-8". + std::unique_ptr Alias; + public: Arg(const Option Opt, StringRef Spelling, unsigned Index, const Arg *BaseArg = nullptr); @@ -90,6 +95,11 @@ public: } void setBaseArg(const Arg *BaseArg) { this->BaseArg = BaseArg; } + /// Args are converted to their unaliased form. For args that originally + /// came from an alias, this returns the alias the arg was produced from. + const Arg* getAlias() const { return Alias.get(); } + void setAlias(std::unique_ptr Alias) { this->Alias = std::move(Alias); } + bool getOwnsValues() const { return OwnsValues; } void setOwnsValues(bool Value) const { OwnsValues = Value; } @@ -127,8 +137,10 @@ public: void print(raw_ostream &O) const; void dump() const; - /// Return a formatted version of the argument and - /// its values, for debugging and diagnostics. + /// Return a formatted version of the argument and its values, for + /// diagnostics. Since this is for diagnostics, if this Arg was produced + /// through an alias, this returns the string representation of the alias + /// that the user wrote. std::string getAsString(const ArgList &Args) const; }; diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h index c11487e3f9d..33813d28d27 100644 --- a/include/llvm/Option/Option.h +++ b/include/llvm/Option/Option.h @@ -206,6 +206,11 @@ public: /// start. Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const; +private: + Arg *acceptInternal(const ArgList &Args, unsigned &Index, + unsigned ArgSize) const; + +public: void print(raw_ostream &O) const; void dump() const; }; diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp index 420279b0bca..ea382b34734 100644 --- a/lib/Option/Arg.cpp +++ b/lib/Option/Arg.cpp @@ -66,6 +66,9 @@ LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); } #endif std::string Arg::getAsString(const ArgList &Args) const { + if (Alias) + return Alias->getAsString(Args); + SmallString<256> Res; raw_svector_ostream OS(Res); diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp index a13278ce8e6..9abc9fdce4c 100644 --- a/lib/Option/Option.cpp +++ b/lib/Option/Option.cpp @@ -106,49 +106,23 @@ bool Option::matches(OptSpecifier Opt) const { return false; } -Arg *Option::accept(const ArgList &Args, - unsigned &Index, - unsigned ArgSize) const { - const Option &UnaliasedOption = getUnaliasedOption(); - StringRef Spelling; - // If the option was an alias, get the spelling from the unaliased one. - if (getID() == UnaliasedOption.getID()) { - Spelling = StringRef(Args.getArgString(Index), ArgSize); - } else { - Spelling = Args.MakeArgString(Twine(UnaliasedOption.getPrefix()) + - Twine(UnaliasedOption.getName())); - } - +Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index, + unsigned ArgSize) const { + StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize); switch (getKind()) { case FlagClass: { if (ArgSize != strlen(Args.getArgString(Index))) return nullptr; - - Arg *A = new Arg(UnaliasedOption, Spelling, Index++); - if (getAliasArgs()) { - const char *Val = getAliasArgs(); - while (*Val != '\0') { - A->getValues().push_back(Val); - - // Move past the '\0' to the next argument. - Val += strlen(Val) + 1; - } - } - - if (UnaliasedOption.getKind() == JoinedClass && !getAliasArgs()) - // A Flag alias for a Joined option must provide an argument. - A->getValues().push_back(""); - - return A; + return new Arg(*this, Spelling, Index++); } case JoinedClass: { const char *Value = Args.getArgString(Index) + ArgSize; - return new Arg(UnaliasedOption, Spelling, Index++, Value); + return new Arg(*this, Spelling, Index++, Value); } case CommaJoinedClass: { // Always matches. const char *Str = Args.getArgString(Index) + ArgSize; - Arg *A = new Arg(UnaliasedOption, Spelling, Index++); + Arg *A = new Arg(*this, Spelling, Index++); // Parse out the comma separated values. const char *Prev = Str; @@ -184,8 +158,7 @@ Arg *Option::accept(const ArgList &Args, Args.getArgString(Index - 1) == nullptr) return nullptr; - return new Arg(UnaliasedOption, Spelling, - Index - 2, Args.getArgString(Index - 1)); + return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1)); case MultiArgClass: { // Matches iff this is an exact match. // FIXME: Avoid strlen. @@ -196,8 +169,8 @@ Arg *Option::accept(const ArgList &Args, if (Index > Args.getNumInputArgStrings()) return nullptr; - Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(), - Args.getArgString(Index - getNumArgs())); + Arg *A = new Arg(*this, Spelling, Index - 1 - getNumArgs(), + Args.getArgString(Index - getNumArgs())); for (unsigned i = 1; i != getNumArgs(); ++i) A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i)); return A; @@ -207,7 +180,7 @@ Arg *Option::accept(const ArgList &Args, // FIXME: Avoid strlen. if (ArgSize != strlen(Args.getArgString(Index))) { const char *Value = Args.getArgString(Index) + ArgSize; - return new Arg(UnaliasedOption, Spelling, Index++, Value); + return new Arg(*this, Spelling, Index++, Value); } // Otherwise it must be separate. @@ -216,8 +189,7 @@ Arg *Option::accept(const ArgList &Args, Args.getArgString(Index - 1) == nullptr) return nullptr; - return new Arg(UnaliasedOption, Spelling, - Index - 2, Args.getArgString(Index - 1)); + return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1)); } case JoinedAndSeparateClass: // Always matches. @@ -226,7 +198,7 @@ Arg *Option::accept(const ArgList &Args, Args.getArgString(Index - 1) == nullptr) return nullptr; - return new Arg(UnaliasedOption, Spelling, Index - 2, + return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 2) + ArgSize, Args.getArgString(Index - 1)); case RemainingArgsClass: { @@ -234,14 +206,14 @@ Arg *Option::accept(const ArgList &Args, // FIXME: Avoid strlen. if (ArgSize != strlen(Args.getArgString(Index))) return nullptr; - Arg *A = new Arg(UnaliasedOption, Spelling, Index++); + Arg *A = new Arg(*this, Spelling, Index++); while (Index < Args.getNumInputArgStrings() && Args.getArgString(Index) != nullptr) A->getValues().push_back(Args.getArgString(Index++)); return A; } case RemainingArgsJoinedClass: { - Arg *A = new Arg(UnaliasedOption, Spelling, Index); + Arg *A = new Arg(*this, Spelling, Index); if (ArgSize != strlen(Args.getArgString(Index))) { // An inexact match means there is a joined arg. A->getValues().push_back(Args.getArgString(Index) + ArgSize); @@ -257,3 +229,62 @@ Arg *Option::accept(const ArgList &Args, llvm_unreachable("Invalid option kind!"); } } + +Arg *Option::accept(const ArgList &Args, + unsigned &Index, + unsigned ArgSize) const { + std::unique_ptr A(acceptInternal(Args, Index, ArgSize)); + if (!A) + return nullptr; + + const Option &UnaliasedOption = getUnaliasedOption(); + if (getID() == UnaliasedOption.getID()) + return A.release(); + + // "A" is an alias for a different flag. For most clients it's more convenient + // if this function returns unaliased Args, so create an unaliased arg for + // returning. + + // This creates a completely new Arg object for the unaliased Arg because + // the alias and the unaliased arg can have different Kinds and different + // Values (due to AliasArgs<>). + + // Get the spelling from the unaliased option. + StringRef UnaliasedSpelling = Args.MakeArgString( + Twine(UnaliasedOption.getPrefix()) + Twine(UnaliasedOption.getName())); + + // It's a bit weird that aliased and unaliased arg share one index, but + // the index is mostly use as a memory optimization in render(). + // Due to this, ArgList::getArgString(A->getIndex()) will return the spelling + // of the aliased arg always, while A->getSpelling() returns either the + // unaliased or the aliased arg, depending on which Arg object it's called on. + Arg *UnaliasedA = new Arg(UnaliasedOption, UnaliasedSpelling, A->getIndex()); + Arg *RawA = A.get(); + UnaliasedA->setAlias(std::move(A)); + + if (getKind() != FlagClass) { + // Values are usually owned by the ArgList. The exception are + // CommaJoined flags, where the Arg owns the values. For aliased flags, + // make the unaliased Arg the owner of the values. + // FIXME: There aren't many uses of CommaJoined -- try removing + // CommaJoined in favor of just calling StringRef::split(',') instead. + UnaliasedA->getValues() = RawA->getValues(); + UnaliasedA->setOwnsValues(RawA->getOwnsValues()); + RawA->setOwnsValues(false); + return UnaliasedA; + } + + // FlagClass aliases can have AliasArgs<>; add those to the unaliased arg. + if (const char *Val = getAliasArgs()) { + while (*Val != '\0') { + UnaliasedA->getValues().push_back(Val); + + // Move past the '\0' to the next argument. + Val += strlen(Val) + 1; + } + } + if (UnaliasedOption.getKind() == JoinedClass && !getAliasArgs()) + // A Flag alias for a Joined option must provide an argument. + UnaliasedA->getValues().push_back(""); + return UnaliasedA; +}