diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h index aaf6b8882ad..ebc08cbd94e 100644 --- a/include/llvm/IR/UseListOrder.h +++ b/include/llvm/IR/UseListOrder.h @@ -15,28 +15,21 @@ #ifndef LLVM_IR_USELISTORDER_H #define LLVM_IR_USELISTORDER_H +#include "llvm/ADT/ArrayRef.h" + namespace llvm { class Module; /// \brief Whether to preserve use-list ordering. bool shouldPreserveBitcodeUseListOrder(); +bool shouldPreserveAssemblyUseListOrder(); /// \brief Shuffle all use-lists in a module. /// /// Adds \c SeedOffset to the default seed for the random number generator. void shuffleUseLists(Module &M, unsigned SeedOffset = 0); -/// \brief Verify use-list order after serializing to bitcode. -/// -/// \return \c true if there are no errors. -bool verifyBitcodeUseListOrder(const Module &M); - -/// \brief Verify use-list order after serializing to assembly. -/// -/// \return \c true if there are no errors. -bool verifyAssemblyUseListOrder(const Module &M); - } // end namespace llvm #endif diff --git a/lib/IR/UseListOrder.cpp b/lib/IR/UseListOrder.cpp index c1d3561a640..dda7f25cf2f 100644 --- a/lib/IR/UseListOrder.cpp +++ b/lib/IR/UseListOrder.cpp @@ -14,18 +14,10 @@ #include "llvm/IR/UseListOrder.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/AsmParser/Parser.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" #include #include @@ -39,10 +31,19 @@ static cl::opt PreserveBitcodeUseListOrder( cl::desc("Experimental support to preserve bitcode use-list order."), cl::init(false), cl::Hidden); +static cl::opt PreserveAssemblyUseListOrder( + "preserve-ll-use-list-order", + cl::desc("Experimental support to preserve assembly use-list order."), + cl::init(false), cl::Hidden); + bool llvm::shouldPreserveBitcodeUseListOrder() { return PreserveBitcodeUseListOrder; } +bool llvm::shouldPreserveAssemblyUseListOrder() { + return PreserveAssemblyUseListOrder; +} + static void shuffleValueUseLists(Value *V, std::minstd_rand0 &Gen, DenseSet &Seen) { if (!Seen.insert(V).second) @@ -127,298 +128,3 @@ void llvm::shuffleUseLists(Module &M, unsigned SeedOffset) { DEBUG(dbgs() << "\n"); } - -namespace { - -struct TempFile { - std::string Filename; - FileRemover Remover; - bool init(const std::string &Ext); - bool writeBitcode(const Module &M) const; - bool writeAssembly(const Module &M) const; - std::unique_ptr readBitcode(LLVMContext &Context) const; - std::unique_ptr readAssembly(LLVMContext &Context) const; -}; - -struct ValueMapping { - DenseMap IDs; - std::vector Values; - - /// \brief Construct a value mapping for module. - /// - /// Creates mapping from every value in \c M to an ID. This mapping includes - /// un-referencable values. - /// - /// Every \a Value that gets serialized in some way should be represented - /// here. The order needs to be deterministic, but it's unnecessary to match - /// the value-ids in the bitcode writer. - /// - /// All constants that are referenced by other values are included in the - /// mapping, but others -- which wouldn't be serialized -- are not. - ValueMapping(const Module &M); - - /// \brief Map a value. - /// - /// Maps a value. If it's a constant, maps all of its operands first. - void map(const Value *V); - unsigned lookup(const Value *V) const { return IDs.lookup(V); } -}; - -} // end namespace - -bool TempFile::init(const std::string &Ext) { - SmallVector Vector; - DEBUG(dbgs() << " - create-temp-file\n"); - if (auto EC = sys::fs::createTemporaryFile("use-list-order", Ext, Vector)) { - (void)EC; - DEBUG(dbgs() << "error: " << EC.message() << "\n"); - return true; - } - assert(!Vector.empty()); - - Filename.assign(Vector.data(), Vector.data() + Vector.size()); - Remover.setFile(Filename); - DEBUG(dbgs() << " - filename = " << Filename << "\n"); - return false; -} - -bool TempFile::writeBitcode(const Module &M) const { - DEBUG(dbgs() << " - write bitcode\n"); - std::string ErrorInfo; - raw_fd_ostream OS(Filename.c_str(), ErrorInfo, sys::fs::F_None); - if (!ErrorInfo.empty()) { - DEBUG(dbgs() << "error: " << ErrorInfo << "\n"); - return true; - } - - WriteBitcodeToFile(&M, OS); - return false; -} - -bool TempFile::writeAssembly(const Module &M) const { - DEBUG(dbgs() << " - write assembly\n"); - std::string ErrorInfo; - raw_fd_ostream OS(Filename.c_str(), ErrorInfo, sys::fs::F_Text); - if (!ErrorInfo.empty()) { - DEBUG(dbgs() << "error: " << ErrorInfo << "\n"); - return true; - } - - OS << M; - return false; -} - -std::unique_ptr TempFile::readBitcode(LLVMContext &Context) const { - DEBUG(dbgs() << " - read bitcode\n"); - ErrorOr> BufferOr = - MemoryBuffer::getFile(Filename); - if (!BufferOr) { - DEBUG(dbgs() << "error: " << BufferOr.getError().message() << "\n"); - return nullptr; - } - - std::unique_ptr Buffer = std::move(BufferOr.get()); - ErrorOr ModuleOr = parseBitcodeFile(Buffer.release(), Context); - if (!ModuleOr) { - DEBUG(dbgs() << "error: " << ModuleOr.getError().message() << "\n"); - return nullptr; - } - return std::unique_ptr(ModuleOr.get()); -} - -std::unique_ptr TempFile::readAssembly(LLVMContext &Context) const { - DEBUG(dbgs() << " - read assembly\n"); - SMDiagnostic Err; - std::unique_ptr M(ParseAssemblyFile(Filename, Err, Context)); - if (!M.get()) - DEBUG(dbgs() << "error: "; Err.print("verify-use-list-order", dbgs())); - return M; -} - -ValueMapping::ValueMapping(const Module &M) { - // Every value should be mapped, including things like void instructions and - // basic blocks that are kept out of the ValueEnumerator. - // - // The current mapping order makes it easier to debug the tables. It happens - // to be similar to the ID mapping when writing ValueEnumerator, but they - // aren't (and needn't be) in sync. - - // Globals. - for (const GlobalVariable &G : M.globals()) - map(&G); - for (const GlobalAlias &A : M.aliases()) - map(&A); - for (const Function &F : M) - map(&F); - - // Constants used by globals. - for (const GlobalVariable &G : M.globals()) - if (G.hasInitializer()) - map(G.getInitializer()); - for (const GlobalAlias &A : M.aliases()) - map(A.getAliasee()); - for (const Function &F : M) - if (F.hasPrefixData()) - map(F.getPrefixData()); - - // Function bodies. - for (const Function &F : M) { - for (const Argument &A : F.args()) - map(&A); - for (const BasicBlock &BB : F) - map(&BB); - for (const BasicBlock &BB : F) - for (const Instruction &I : BB) - map(&I); - - // Constants used by instructions. - for (const BasicBlock &BB : F) - for (const Instruction &I : BB) - for (const Value *Op : I.operands()) - if ((isa(Op) && !isa(*Op)) || - isa(Op)) - map(Op); - } -} - -void ValueMapping::map(const Value *V) { - if (IDs.lookup(V)) - return; - - if (auto *C = dyn_cast(V)) - if (!isa(C)) - for (const Value *Op : C->operands()) - map(Op); - - Values.push_back(V); - IDs[V] = Values.size(); -} - -#ifndef NDEBUG -static void dumpMapping(const ValueMapping &VM) { - dbgs() << "value-mapping (size = " << VM.Values.size() << "):\n"; - for (unsigned I = 0, E = VM.Values.size(); I != E; ++I) { - dbgs() << " - id = " << I << ", value = "; - VM.Values[I]->dump(); - } -} - -static void debugValue(const ValueMapping &M, unsigned I, StringRef Desc) { - const Value *V = M.Values[I]; - dbgs() << " - " << Desc << " value = "; - V->dump(); - for (const Use &U : V->uses()) { - dbgs() << " => use: op = " << U.getOperandNo() - << ", user-id = " << M.IDs.lookup(U.getUser()) << ", user = "; - U.getUser()->dump(); - } -} - -static void debugUserMismatch(const ValueMapping &L, const ValueMapping &R, - unsigned I) { - dbgs() << " - fail: user mismatch: ID = " << I << "\n"; - debugValue(L, I, "LHS"); - debugValue(R, I, "RHS"); - - dbgs() << "\nlhs-"; - dumpMapping(L); - dbgs() << "\nrhs-"; - dumpMapping(R); -} - -static void debugSizeMismatch(const ValueMapping &L, const ValueMapping &R) { - dbgs() << " - fail: map size: " << L.Values.size() - << " != " << R.Values.size() << "\n"; - dbgs() << "\nlhs-"; - dumpMapping(L); - dbgs() << "\nrhs-"; - dumpMapping(R); -} -#endif - -static bool matches(const ValueMapping &LM, const ValueMapping &RM) { - DEBUG(dbgs() << "compare value maps\n"); - if (LM.Values.size() != RM.Values.size()) { - DEBUG(debugSizeMismatch(LM, RM)); - return false; - } - - // This mapping doesn't include dangling constant users, since those don't - // get serialized. However, checking if users are constant and calling - // isConstantUsed() on every one is very expensive. Instead, just check if - // the user is mapped. - auto skipUnmappedUsers = - [&](Value::const_use_iterator &U, Value::const_use_iterator E, - const ValueMapping &M) { - while (U != E && !M.lookup(U->getUser())) - ++U; - }; - - // Iterate through all values, and check that both mappings have the same - // users. - for (unsigned I = 0, E = LM.Values.size(); I != E; ++I) { - const Value *L = LM.Values[I]; - const Value *R = RM.Values[I]; - auto LU = L->use_begin(), LE = L->use_end(); - auto RU = R->use_begin(), RE = R->use_end(); - skipUnmappedUsers(LU, LE, LM); - skipUnmappedUsers(RU, RE, RM); - - while (LU != LE) { - if (RU == RE) { - DEBUG(debugUserMismatch(LM, RM, I)); - return false; - } - if (LM.lookup(LU->getUser()) != RM.lookup(RU->getUser())) { - DEBUG(debugUserMismatch(LM, RM, I)); - return false; - } - if (LU->getOperandNo() != RU->getOperandNo()) { - DEBUG(debugUserMismatch(LM, RM, I)); - return false; - } - skipUnmappedUsers(++LU, LE, LM); - skipUnmappedUsers(++RU, RE, RM); - } - if (RU != RE) { - DEBUG(debugUserMismatch(LM, RM, I)); - return false; - } - } - - return true; -} - -bool llvm::verifyBitcodeUseListOrder(const Module &M) { - DEBUG(dbgs() << "*** verify-use-list-order: bitcode ***\n"); - TempFile F; - if (F.init("bc")) - return false; - - if (F.writeBitcode(M)) - return false; - - LLVMContext Context; - std::unique_ptr OtherM = F.readBitcode(Context); - if (!OtherM) - return false; - - return matches(ValueMapping(M), ValueMapping(*OtherM)); -} - -bool llvm::verifyAssemblyUseListOrder(const Module &M) { - DEBUG(dbgs() << "*** verify-use-list-order: assembly ***\n"); - TempFile F; - if (F.init("ll")) - return false; - - if (F.writeAssembly(M)) - return false; - - LLVMContext Context; - std::unique_ptr OtherM = F.readAssembly(Context); - if (!OtherM) - return false; - - return matches(ValueMapping(M), ValueMapping(*OtherM)); -} diff --git a/lib/Transforms/IPO/VerifyUseListOrder.cpp b/lib/Transforms/IPO/VerifyUseListOrder.cpp index a48f6c78202..16a88e3d9a1 100644 --- a/lib/Transforms/IPO/VerifyUseListOrder.cpp +++ b/lib/Transforms/IPO/VerifyUseListOrder.cpp @@ -15,15 +15,320 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/UseListOrder.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" using namespace llvm; #define DEBUG_TYPE "use-list-order" +namespace { + +struct TempFile { + std::string Filename; + FileRemover Remover; + bool init(const std::string &Ext); + bool writeBitcode(const Module &M) const; + bool writeAssembly(const Module &M) const; + std::unique_ptr readBitcode(LLVMContext &Context) const; + std::unique_ptr readAssembly(LLVMContext &Context) const; +}; + +struct ValueMapping { + DenseMap IDs; + std::vector Values; + + /// \brief Construct a value mapping for module. + /// + /// Creates mapping from every value in \c M to an ID. This mapping includes + /// un-referencable values. + /// + /// Every \a Value that gets serialized in some way should be represented + /// here. The order needs to be deterministic, but it's unnecessary to match + /// the value-ids in the bitcode writer. + /// + /// All constants that are referenced by other values are included in the + /// mapping, but others -- which wouldn't be serialized -- are not. + ValueMapping(const Module &M); + + /// \brief Map a value. + /// + /// Maps a value. If it's a constant, maps all of its operands first. + void map(const Value *V); + unsigned lookup(const Value *V) const { return IDs.lookup(V); } +}; + +} // end namespace + +bool TempFile::init(const std::string &Ext) { + SmallVector Vector; + DEBUG(dbgs() << " - create-temp-file\n"); + if (auto EC = sys::fs::createTemporaryFile("use-list-order", Ext, Vector)) { + (void)EC; + DEBUG(dbgs() << "error: " << EC.message() << "\n"); + return true; + } + assert(!Vector.empty()); + + Filename.assign(Vector.data(), Vector.data() + Vector.size()); + Remover.setFile(Filename); + DEBUG(dbgs() << " - filename = " << Filename << "\n"); + return false; +} + +bool TempFile::writeBitcode(const Module &M) const { + DEBUG(dbgs() << " - write bitcode\n"); + std::string ErrorInfo; + raw_fd_ostream OS(Filename.c_str(), ErrorInfo, sys::fs::F_None); + if (!ErrorInfo.empty()) { + DEBUG(dbgs() << "error: " << ErrorInfo << "\n"); + return true; + } + + WriteBitcodeToFile(&M, OS); + return false; +} + +bool TempFile::writeAssembly(const Module &M) const { + DEBUG(dbgs() << " - write assembly\n"); + std::string ErrorInfo; + raw_fd_ostream OS(Filename.c_str(), ErrorInfo, sys::fs::F_Text); + if (!ErrorInfo.empty()) { + DEBUG(dbgs() << "error: " << ErrorInfo << "\n"); + return true; + } + + OS << M; + return false; +} + +std::unique_ptr TempFile::readBitcode(LLVMContext &Context) const { + DEBUG(dbgs() << " - read bitcode\n"); + ErrorOr> BufferOr = + MemoryBuffer::getFile(Filename); + if (!BufferOr) { + DEBUG(dbgs() << "error: " << BufferOr.getError().message() << "\n"); + return nullptr; + } + + std::unique_ptr Buffer = std::move(BufferOr.get()); + ErrorOr ModuleOr = parseBitcodeFile(Buffer.release(), Context); + if (!ModuleOr) { + DEBUG(dbgs() << "error: " << ModuleOr.getError().message() << "\n"); + return nullptr; + } + return std::unique_ptr(ModuleOr.get()); +} + +std::unique_ptr TempFile::readAssembly(LLVMContext &Context) const { + DEBUG(dbgs() << " - read assembly\n"); + SMDiagnostic Err; + std::unique_ptr M(ParseAssemblyFile(Filename, Err, Context)); + if (!M.get()) + DEBUG(dbgs() << "error: "; Err.print("verify-use-list-order", dbgs())); + return M; +} + +ValueMapping::ValueMapping(const Module &M) { + // Every value should be mapped, including things like void instructions and + // basic blocks that are kept out of the ValueEnumerator. + // + // The current mapping order makes it easier to debug the tables. It happens + // to be similar to the ID mapping when writing ValueEnumerator, but they + // aren't (and needn't be) in sync. + + // Globals. + for (const GlobalVariable &G : M.globals()) + map(&G); + for (const GlobalAlias &A : M.aliases()) + map(&A); + for (const Function &F : M) + map(&F); + + // Constants used by globals. + for (const GlobalVariable &G : M.globals()) + if (G.hasInitializer()) + map(G.getInitializer()); + for (const GlobalAlias &A : M.aliases()) + map(A.getAliasee()); + for (const Function &F : M) + if (F.hasPrefixData()) + map(F.getPrefixData()); + + // Function bodies. + for (const Function &F : M) { + for (const Argument &A : F.args()) + map(&A); + for (const BasicBlock &BB : F) + map(&BB); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + map(&I); + + // Constants used by instructions. + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + for (const Value *Op : I.operands()) + if ((isa(Op) && !isa(*Op)) || + isa(Op)) + map(Op); + } +} + +void ValueMapping::map(const Value *V) { + if (IDs.lookup(V)) + return; + + if (auto *C = dyn_cast(V)) + if (!isa(C)) + for (const Value *Op : C->operands()) + map(Op); + + Values.push_back(V); + IDs[V] = Values.size(); +} + +#ifndef NDEBUG +static void dumpMapping(const ValueMapping &VM) { + dbgs() << "value-mapping (size = " << VM.Values.size() << "):\n"; + for (unsigned I = 0, E = VM.Values.size(); I != E; ++I) { + dbgs() << " - id = " << I << ", value = "; + VM.Values[I]->dump(); + } +} + +static void debugValue(const ValueMapping &M, unsigned I, StringRef Desc) { + const Value *V = M.Values[I]; + dbgs() << " - " << Desc << " value = "; + V->dump(); + for (const Use &U : V->uses()) { + dbgs() << " => use: op = " << U.getOperandNo() + << ", user-id = " << M.IDs.lookup(U.getUser()) << ", user = "; + U.getUser()->dump(); + } +} + +static void debugUserMismatch(const ValueMapping &L, const ValueMapping &R, + unsigned I) { + dbgs() << " - fail: user mismatch: ID = " << I << "\n"; + debugValue(L, I, "LHS"); + debugValue(R, I, "RHS"); + + dbgs() << "\nlhs-"; + dumpMapping(L); + dbgs() << "\nrhs-"; + dumpMapping(R); +} + +static void debugSizeMismatch(const ValueMapping &L, const ValueMapping &R) { + dbgs() << " - fail: map size: " << L.Values.size() + << " != " << R.Values.size() << "\n"; + dbgs() << "\nlhs-"; + dumpMapping(L); + dbgs() << "\nrhs-"; + dumpMapping(R); +} +#endif + +static bool matches(const ValueMapping &LM, const ValueMapping &RM) { + DEBUG(dbgs() << "compare value maps\n"); + if (LM.Values.size() != RM.Values.size()) { + DEBUG(debugSizeMismatch(LM, RM)); + return false; + } + + // This mapping doesn't include dangling constant users, since those don't + // get serialized. However, checking if users are constant and calling + // isConstantUsed() on every one is very expensive. Instead, just check if + // the user is mapped. + auto skipUnmappedUsers = + [&](Value::const_use_iterator &U, Value::const_use_iterator E, + const ValueMapping &M) { + while (U != E && !M.lookup(U->getUser())) + ++U; + }; + + // Iterate through all values, and check that both mappings have the same + // users. + for (unsigned I = 0, E = LM.Values.size(); I != E; ++I) { + const Value *L = LM.Values[I]; + const Value *R = RM.Values[I]; + auto LU = L->use_begin(), LE = L->use_end(); + auto RU = R->use_begin(), RE = R->use_end(); + skipUnmappedUsers(LU, LE, LM); + skipUnmappedUsers(RU, RE, RM); + + while (LU != LE) { + if (RU == RE) { + DEBUG(debugUserMismatch(LM, RM, I)); + return false; + } + if (LM.lookup(LU->getUser()) != RM.lookup(RU->getUser())) { + DEBUG(debugUserMismatch(LM, RM, I)); + return false; + } + if (LU->getOperandNo() != RU->getOperandNo()) { + DEBUG(debugUserMismatch(LM, RM, I)); + return false; + } + skipUnmappedUsers(++LU, LE, LM); + skipUnmappedUsers(++RU, RE, RM); + } + if (RU != RE) { + DEBUG(debugUserMismatch(LM, RM, I)); + return false; + } + } + + return true; +} + +static bool verifyBitcodeUseListOrder(const Module &M) { + DEBUG(dbgs() << "*** verify-use-list-order: bitcode ***\n"); + TempFile F; + if (F.init("bc")) + return false; + + if (F.writeBitcode(M)) + return false; + + LLVMContext Context; + std::unique_ptr OtherM = F.readBitcode(Context); + if (!OtherM) + return false; + + return matches(ValueMapping(M), ValueMapping(*OtherM)); +} + +static bool verifyAssemblyUseListOrder(const Module &M) { + DEBUG(dbgs() << "*** verify-use-list-order: assembly ***\n"); + TempFile F; + if (F.init("ll")) + return false; + + if (F.writeAssembly(M)) + return false; + + LLVMContext Context; + std::unique_ptr OtherM = F.readAssembly(Context); + if (!OtherM) + return false; + + return matches(ValueMapping(M), ValueMapping(*OtherM)); +} + namespace { class VerifyUseListOrder : public ModulePass { public: @@ -53,6 +358,10 @@ bool VerifyUseListOrder::runOnModule(Module &M) { if (!verifyBitcodeUseListOrder(M)) report_fatal_error("bitcode use-list order changed"); + if (shouldPreserveBitcodeUseListOrder()) + if (!verifyAssemblyUseListOrder(M)) + report_fatal_error("assembly use-list order changed"); + return true; }