1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00
llvm-mirror/lib/Transforms/IPO/WholeProgramDevirt.cpp
Serge Guelton 6ca59da244 Module::getOrInsertFunction is using C-style vararg instead of variadic templates.
From a user prospective, it forces the use of an annoying nullptr to mark the end of the vararg, and there's not type checking on the arguments.
The variadic template is an obvious solution to both issues.

Differential Revision: https://reviews.llvm.org/D31070

llvm-svn: 299949
2017-04-11 15:01:18 +00:00

1425 lines
53 KiB
C++

//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass implements whole program optimization of virtual calls in cases
// where we know (via !type metadata) that the list of callees is fixed. This
// includes the following:
// - Single implementation devirtualization: if a virtual call has a single
// possible callee, replace all calls with a direct call to that callee.
// - Virtual constant propagation: if the virtual function's return type is an
// integer <=64 bits and all possible callees are readnone, for each class and
// each list of constant arguments: evaluate the function, store the return
// value alongside the virtual table, and rewrite each virtual call as a load
// from the virtual table.
// - Uniform return value optimization: if the conditions for virtual constant
// propagation hold and each function returns the same constant value, replace
// each virtual call with that constant.
// - Unique return value optimization for i1 return values: if the conditions
// for virtual constant propagation hold and a single vtable's function
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
// This pass is intended to be used during the regular and thin LTO pipelines.
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
// eligible for virtual call optimization (i.e. calls that use either of the
// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
// ThinLTO, the pass operates in two phases:
// - Export phase: this is run during the thin link over a single merged module
// that contains all vtables with !type metadata that participate in the link.
// The pass computes a resolution for each virtual call and stores it in the
// type identifier summary.
// - Import phase: this is run during the thin backends over the individual
// modules. The pass applies the resolutions previously computed during the
// import phase to each eligible virtual call.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include <algorithm>
#include <cstddef>
#include <map>
#include <set>
#include <string>
using namespace llvm;
using namespace wholeprogramdevirt;
#define DEBUG_TYPE "wholeprogramdevirt"
static cl::opt<PassSummaryAction> ClSummaryAction(
"wholeprogramdevirt-summary-action",
cl::desc("What to do with the summary when running this pass"),
cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
clEnumValN(PassSummaryAction::Import, "import",
"Import typeid resolutions from summary and globals"),
clEnumValN(PassSummaryAction::Export, "export",
"Export typeid resolutions to summary and globals")),
cl::Hidden);
static cl::opt<std::string> ClReadSummary(
"wholeprogramdevirt-read-summary",
cl::desc("Read summary from given YAML file before running pass"),
cl::Hidden);
static cl::opt<std::string> ClWriteSummary(
"wholeprogramdevirt-write-summary",
cl::desc("Write summary to given YAML file after running pass"),
cl::Hidden);
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
uint64_t
wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
bool IsAfter, uint64_t Size) {
// Find a minimum offset taking into account only vtable sizes.
uint64_t MinByte = 0;
for (const VirtualCallTarget &Target : Targets) {
if (IsAfter)
MinByte = std::max(MinByte, Target.minAfterBytes());
else
MinByte = std::max(MinByte, Target.minBeforeBytes());
}
// Build a vector of arrays of bytes covering, for each target, a slice of the
// used region (see AccumBitVector::BytesUsed in
// llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
// this aligns the used regions to start at MinByte.
//
// In this example, A, B and C are vtables, # is a byte already allocated for
// a virtual function pointer, AAAA... (etc.) are the used regions for the
// vtables and Offset(X) is the value computed for the Offset variable below
// for X.
//
// Offset(A)
// | |
// |MinByte
// A: ################AAAAAAAA|AAAAAAAA
// B: ########BBBBBBBBBBBBBBBB|BBBB
// C: ########################|CCCCCCCCCCCCCCCC
// | Offset(B) |
//
// This code produces the slices of A, B and C that appear after the divider
// at MinByte.
std::vector<ArrayRef<uint8_t>> Used;
for (const VirtualCallTarget &Target : Targets) {
ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
: Target.TM->Bits->Before.BytesUsed;
uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
: MinByte - Target.minBeforeBytes();
// Disregard used regions that are smaller than Offset. These are
// effectively all-free regions that do not need to be checked.
if (VTUsed.size() > Offset)
Used.push_back(VTUsed.slice(Offset));
}
if (Size == 1) {
// Find a free bit in each member of Used.
for (unsigned I = 0;; ++I) {
uint8_t BitsUsed = 0;
for (auto &&B : Used)
if (I < B.size())
BitsUsed |= B[I];
if (BitsUsed != 0xff)
return (MinByte + I) * 8 +
countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined);
}
} else {
// Find a free (Size/8) byte region in each member of Used.
// FIXME: see if alignment helps.
for (unsigned I = 0;; ++I) {
for (auto &&B : Used) {
unsigned Byte = 0;
while ((I + Byte) < B.size() && Byte < (Size / 8)) {
if (B[I + Byte])
goto NextI;
++Byte;
}
}
return (MinByte + I) * 8;
NextI:;
}
}
}
void wholeprogramdevirt::setBeforeReturnValues(
MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
if (BitWidth == 1)
OffsetByte = -(AllocBefore / 8 + 1);
else
OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
OffsetBit = AllocBefore % 8;
for (VirtualCallTarget &Target : Targets) {
if (BitWidth == 1)
Target.setBeforeBit(AllocBefore);
else
Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
}
}
void wholeprogramdevirt::setAfterReturnValues(
MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocAfter,
unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
if (BitWidth == 1)
OffsetByte = AllocAfter / 8;
else
OffsetByte = (AllocAfter + 7) / 8;
OffsetBit = AllocAfter % 8;
for (VirtualCallTarget &Target : Targets) {
if (BitWidth == 1)
Target.setAfterBit(AllocAfter);
else
Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
}
}
VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM)
: Fn(Fn), TM(TM),
IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), WasDevirt(false) {}
namespace {
// A slot in a set of virtual tables. The TypeID identifies the set of virtual
// tables, and the ByteOffset is the offset in bytes from the address point to
// the virtual function pointer.
struct VTableSlot {
Metadata *TypeID;
uint64_t ByteOffset;
};
} // end anonymous namespace
namespace llvm {
template <> struct DenseMapInfo<VTableSlot> {
static VTableSlot getEmptyKey() {
return {DenseMapInfo<Metadata *>::getEmptyKey(),
DenseMapInfo<uint64_t>::getEmptyKey()};
}
static VTableSlot getTombstoneKey() {
return {DenseMapInfo<Metadata *>::getTombstoneKey(),
DenseMapInfo<uint64_t>::getTombstoneKey()};
}
static unsigned getHashValue(const VTableSlot &I) {
return DenseMapInfo<Metadata *>::getHashValue(I.TypeID) ^
DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
}
static bool isEqual(const VTableSlot &LHS,
const VTableSlot &RHS) {
return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
}
};
} // end namespace llvm
namespace {
// A virtual call site. VTable is the loaded virtual table pointer, and CS is
// the indirect virtual call.
struct VirtualCallSite {
Value *VTable;
CallSite CS;
// If non-null, this field points to the associated unsafe use count stored in
// the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
// of that field for details.
unsigned *NumUnsafeUses;
void emitRemark(const Twine &OptName, const Twine &TargetName) {
Function *F = CS.getCaller();
emitOptimizationRemark(
F->getContext(), DEBUG_TYPE, *F,
CS.getInstruction()->getDebugLoc(),
OptName + ": devirtualized a call to " + TargetName);
}
void replaceAndErase(const Twine &OptName, const Twine &TargetName,
bool RemarksEnabled, Value *New) {
if (RemarksEnabled)
emitRemark(OptName, TargetName);
CS->replaceAllUsesWith(New);
if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) {
BranchInst::Create(II->getNormalDest(), CS.getInstruction());
II->getUnwindDest()->removePredecessor(II->getParent());
}
CS->eraseFromParent();
// This use is no longer unsafe.
if (NumUnsafeUses)
--*NumUnsafeUses;
}
};
// Call site information collected for a specific VTableSlot and possibly a list
// of constant integer arguments. The grouping by arguments is handled by the
// VTableSlotInfo class.
struct CallSiteInfo {
/// The set of call sites for this slot. Used during regular LTO and the
/// import phase of ThinLTO (as well as the export phase of ThinLTO for any
/// call sites that appear in the merged module itself); in each of these
/// cases we are directly operating on the call sites at the IR level.
std::vector<VirtualCallSite> CallSites;
// These fields are used during the export phase of ThinLTO and reflect
// information collected from function summaries.
/// Whether any function summary contains an llvm.assume(llvm.type.test) for
/// this slot.
bool SummaryHasTypeTestAssumeUsers;
/// CFI-specific: a vector containing the list of function summaries that use
/// the llvm.type.checked.load intrinsic and therefore will require
/// resolutions for llvm.type.test in order to implement CFI checks if
/// devirtualization was unsuccessful. If devirtualization was successful, the
/// pass will clear this vector by calling markDevirt(). If at the end of the
/// pass the vector is non-empty, we will need to add a use of llvm.type.test
/// to each of the function summaries in the vector.
std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
bool isExported() const {
return SummaryHasTypeTestAssumeUsers ||
!SummaryTypeCheckedLoadUsers.empty();
}
/// As explained in the comment for SummaryTypeCheckedLoadUsers.
void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); }
};
// Call site information collected for a specific VTableSlot.
struct VTableSlotInfo {
// The set of call sites which do not have all constant integer arguments
// (excluding "this").
CallSiteInfo CSInfo;
// The set of call sites with all constant integer arguments (excluding
// "this"), grouped by argument list.
std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
void addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses);
private:
CallSiteInfo &findCallSiteInfo(CallSite CS);
};
CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) {
std::vector<uint64_t> Args;
auto *CI = dyn_cast<IntegerType>(CS.getType());
if (!CI || CI->getBitWidth() > 64 || CS.arg_empty())
return CSInfo;
for (auto &&Arg : make_range(CS.arg_begin() + 1, CS.arg_end())) {
auto *CI = dyn_cast<ConstantInt>(Arg);
if (!CI || CI->getBitWidth() > 64)
return CSInfo;
Args.push_back(CI->getZExtValue());
}
return ConstCSInfo[Args];
}
void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
unsigned *NumUnsafeUses) {
findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses});
}
struct DevirtModule {
Module &M;
function_ref<AAResults &(Function &)> AARGetter;
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
IntegerType *Int8Ty;
PointerType *Int8PtrTy;
IntegerType *Int32Ty;
IntegerType *Int64Ty;
IntegerType *IntPtrTy;
bool RemarksEnabled;
MapVector<VTableSlot, VTableSlotInfo> CallSlots;
// This map keeps track of the number of "unsafe" uses of a loaded function
// pointer. The key is the associated llvm.type.test intrinsic call generated
// by this pass. An unsafe use is one that calls the loaded function pointer
// directly. Every time we eliminate an unsafe use (for example, by
// devirtualizing it or by applying virtual constant propagation), we
// decrement the value stored in this map. If a value reaches zero, we can
// eliminate the type check by RAUWing the associated llvm.type.test call with
// true.
std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
: M(M), AARGetter(AARGetter), ExportSummary(ExportSummary),
ImportSummary(ImportSummary), Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
Int64Ty(Type::getInt64Ty(M.getContext())),
IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
RemarksEnabled(areRemarksEnabled()) {
assert(!(ExportSummary && ImportSummary));
}
bool areRemarksEnabled();
void scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc);
void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
void buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
Constant *getPointerAtOffset(Constant *I, uint64_t Offset);
bool
tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos,
uint64_t ByteOffset);
void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
bool &IsExported);
bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res);
bool tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
ArrayRef<uint64_t> Args);
void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
uint64_t TheRetVal);
bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
CallSiteInfo &CSInfo,
WholeProgramDevirtResolution::ByArg *Res);
// Returns the global symbol name that is used to export information about the
// given vtable slot and list of arguments.
std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
StringRef Name);
// This function is called during the export phase to create a symbol
// definition containing information about the given vtable slot and list of
// arguments.
void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
Constant *C);
// This function is called during the import phase to create a reference to
// the symbol definition created during the export phase.
Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
StringRef Name, unsigned AbsWidth = 0);
void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
Constant *UniqueMemberAddr);
bool tryUniqueRetValOpt(unsigned BitWidth,
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
CallSiteInfo &CSInfo,
WholeProgramDevirtResolution::ByArg *Res,
VTableSlot Slot, ArrayRef<uint64_t> Args);
void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Constant *Byte, Constant *Bit);
bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res, VTableSlot Slot);
void rebuildGlobal(VTableBits &B);
// Apply the summary resolution for Slot to all virtual calls in SlotInfo.
void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
// If we were able to eliminate all unsafe uses for a type checked load,
// eliminate the associated type tests by replacing them with true.
void removeRedundantTypeTests();
bool run();
// Lower the module using the action and summary passed as command line
// arguments. For testing purposes only.
static bool runForTesting(Module &M,
function_ref<AAResults &(Function &)> AARGetter);
};
struct WholeProgramDevirt : public ModulePass {
static char ID;
bool UseCommandLine = false;
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
}
WholeProgramDevirt(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
: ModulePass(ID), ExportSummary(ExportSummary),
ImportSummary(ImportSummary) {
initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
if (UseCommandLine)
return DevirtModule::runForTesting(M, LegacyAARGetter(*this));
return DevirtModule(M, LegacyAARGetter(*this), ExportSummary, ImportSummary)
.run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
"Whole program devirtualization", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
"Whole program devirtualization", false, false)
char WholeProgramDevirt::ID = 0;
ModulePass *
llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary) {
return new WholeProgramDevirt(ExportSummary, ImportSummary);
}
PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto AARGetter = [&](Function &F) -> AAResults & {
return FAM.getResult<AAManager>(F);
};
if (!DevirtModule(M, AARGetter, nullptr, nullptr).run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
bool DevirtModule::runForTesting(
Module &M, function_ref<AAResults &(Function &)> AARGetter) {
ModuleSummaryIndex Summary;
// Handle the command-line summary arguments. This code is for testing
// purposes only, so we handle errors directly.
if (!ClReadSummary.empty()) {
ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
": ");
auto ReadSummaryFile =
ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
yaml::Input In(ReadSummaryFile->getBuffer());
In >> Summary;
ExitOnErr(errorCodeToError(In.error()));
}
bool Changed =
DevirtModule(
M, AARGetter,
ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
.run();
if (!ClWriteSummary.empty()) {
ExitOnError ExitOnErr(
"-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
std::error_code EC;
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
ExitOnErr(errorCodeToError(EC));
yaml::Output Out(OS);
Out << Summary;
}
return Changed;
}
void DevirtModule::buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
DenseMap<GlobalVariable *, VTableBits *> GVToBits;
Bits.reserve(M.getGlobalList().size());
SmallVector<MDNode *, 2> Types;
for (GlobalVariable &GV : M.globals()) {
Types.clear();
GV.getMetadata(LLVMContext::MD_type, Types);
if (Types.empty())
continue;
VTableBits *&BitsPtr = GVToBits[&GV];
if (!BitsPtr) {
Bits.emplace_back();
Bits.back().GV = &GV;
Bits.back().ObjectSize =
M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
BitsPtr = &Bits.back();
}
for (MDNode *Type : Types) {
auto TypeID = Type->getOperand(1).get();
uint64_t Offset =
cast<ConstantInt>(
cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
->getZExtValue();
TypeIdMap[TypeID].insert({BitsPtr, Offset});
}
}
}
Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) {
if (I->getType()->isPointerTy()) {
if (Offset == 0)
return I;
return nullptr;
}
const DataLayout &DL = M.getDataLayout();
if (auto *C = dyn_cast<ConstantStruct>(I)) {
const StructLayout *SL = DL.getStructLayout(C->getType());
if (Offset >= SL->getSizeInBytes())
return nullptr;
unsigned Op = SL->getElementContainingOffset(Offset);
return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
Offset - SL->getElementOffset(Op));
}
if (auto *C = dyn_cast<ConstantArray>(I)) {
ArrayType *VTableTy = C->getType();
uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
unsigned Op = Offset / ElemSize;
if (Op >= C->getNumOperands())
return nullptr;
return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
Offset % ElemSize);
}
return nullptr;
}
bool DevirtModule::tryFindVirtualCallTargets(
std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
for (const TypeMemberInfo &TM : TypeMemberInfos) {
if (!TM.Bits->GV->isConstant())
return false;
Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
TM.Offset + ByteOffset);
if (!Ptr)
return false;
auto Fn = dyn_cast<Function>(Ptr->stripPointerCasts());
if (!Fn)
return false;
// We can disregard __cxa_pure_virtual as a possible call target, as
// calls to pure virtuals are UB.
if (Fn->getName() == "__cxa_pure_virtual")
continue;
TargetsForSlot.push_back({Fn, &TM});
}
// Give up if we couldn't find any targets.
return !TargetsForSlot.empty();
}
void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Constant *TheFn, bool &IsExported) {
auto Apply = [&](CallSiteInfo &CSInfo) {
for (auto &&VCallSite : CSInfo.CallSites) {
if (RemarksEnabled)
VCallSite.emitRemark("single-impl", TheFn->getName());
VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
TheFn, VCallSite.CS.getCalledValue()->getType()));
// This use is no longer unsafe.
if (VCallSite.NumUnsafeUses)
--*VCallSite.NumUnsafeUses;
}
if (CSInfo.isExported()) {
IsExported = true;
CSInfo.markDevirt();
}
};
Apply(SlotInfo.CSInfo);
for (auto &P : SlotInfo.ConstCSInfo)
Apply(P.second);
}
bool DevirtModule::trySingleImplDevirt(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) {
// See if the program contains a single implementation of this virtual
// function.
Function *TheFn = TargetsForSlot[0].Fn;
for (auto &&Target : TargetsForSlot)
if (TheFn != Target.Fn)
return false;
// If so, update each call site to call that implementation directly.
if (RemarksEnabled)
TargetsForSlot[0].WasDevirt = true;
bool IsExported = false;
applySingleImplDevirt(SlotInfo, TheFn, IsExported);
if (!IsExported)
return false;
// If the only implementation has local linkage, we must promote to external
// to make it visible to thin LTO objects. We can only get here during the
// ThinLTO export phase.
if (TheFn->hasLocalLinkage()) {
TheFn->setLinkage(GlobalValue::ExternalLinkage);
TheFn->setVisibility(GlobalValue::HiddenVisibility);
TheFn->setName(TheFn->getName() + "$merged");
}
Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
Res->SingleImplName = TheFn->getName();
return true;
}
bool DevirtModule::tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
ArrayRef<uint64_t> Args) {
// Evaluate each function and store the result in each target's RetVal
// field.
for (VirtualCallTarget &Target : TargetsForSlot) {
if (Target.Fn->arg_size() != Args.size() + 1)
return false;
Evaluator Eval(M.getDataLayout(), nullptr);
SmallVector<Constant *, 2> EvalArgs;
EvalArgs.push_back(
Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
for (unsigned I = 0; I != Args.size(); ++I) {
auto *ArgTy = dyn_cast<IntegerType>(
Target.Fn->getFunctionType()->getParamType(I + 1));
if (!ArgTy)
return false;
EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
}
Constant *RetVal;
if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
!isa<ConstantInt>(RetVal))
return false;
Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
}
return true;
}
void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
uint64_t TheRetVal) {
for (auto Call : CSInfo.CallSites)
Call.replaceAndErase(
"uniform-ret-val", FnName, RemarksEnabled,
ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal));
CSInfo.markDevirt();
}
bool DevirtModule::tryUniformRetValOpt(
MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
WholeProgramDevirtResolution::ByArg *Res) {
// Uniform return value optimization. If all functions return the same
// constant, replace all calls with that constant.
uint64_t TheRetVal = TargetsForSlot[0].RetVal;
for (const VirtualCallTarget &Target : TargetsForSlot)
if (Target.RetVal != TheRetVal)
return false;
if (CSInfo.isExported()) {
Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal;
Res->Info = TheRetVal;
}
applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
return true;
}
std::string DevirtModule::getGlobalName(VTableSlot Slot,
ArrayRef<uint64_t> Args,
StringRef Name) {
std::string FullName = "__typeid_";
raw_string_ostream OS(FullName);
OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
for (uint64_t Arg : Args)
OS << '_' << Arg;
OS << '_' << Name;
return OS.str();
}
void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
StringRef Name, Constant *C) {
GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
getGlobalName(Slot, Args, Name), C, &M);
GA->setVisibility(GlobalValue::HiddenVisibility);
}
Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
StringRef Name, unsigned AbsWidth) {
Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty);
auto *GV = dyn_cast<GlobalVariable>(C);
// We only need to set metadata if the global is newly created, in which
// case it would not have hidden visibility.
if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
return C;
GV->setVisibility(GlobalValue::HiddenVisibility);
auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
GV->setMetadata(LLVMContext::MD_absolute_symbol,
MDNode::get(M.getContext(), {MinC, MaxC}));
};
if (AbsWidth == IntPtrTy->getBitWidth())
SetAbsRange(~0ull, ~0ull); // Full set.
else if (AbsWidth)
SetAbsRange(0, 1ull << AbsWidth);
return GV;
}
void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
bool IsOne,
Constant *UniqueMemberAddr) {
for (auto &&Call : CSInfo.CallSites) {
IRBuilder<> B(Call.CS.getInstruction());
Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
Call.VTable, UniqueMemberAddr);
Cmp = B.CreateZExt(Cmp, Call.CS->getType());
Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, Cmp);
}
CSInfo.markDevirt();
}
bool DevirtModule::tryUniqueRetValOpt(
unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
VTableSlot Slot, ArrayRef<uint64_t> Args) {
// IsOne controls whether we look for a 0 or a 1.
auto tryUniqueRetValOptFor = [&](bool IsOne) {
const TypeMemberInfo *UniqueMember = nullptr;
for (const VirtualCallTarget &Target : TargetsForSlot) {
if (Target.RetVal == (IsOne ? 1 : 0)) {
if (UniqueMember)
return false;
UniqueMember = Target.TM;
}
}
// We should have found a unique member or bailed out by now. We already
// checked for a uniform return value in tryUniformRetValOpt.
assert(UniqueMember);
Constant *UniqueMemberAddr =
ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy);
UniqueMemberAddr = ConstantExpr::getGetElementPtr(
Int8Ty, UniqueMemberAddr,
ConstantInt::get(Int64Ty, UniqueMember->Offset));
if (CSInfo.isExported()) {
Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;
Res->Info = IsOne;
exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
}
// Replace each call with the comparison.
applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
UniqueMemberAddr);
// Update devirtualization statistics for targets.
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
return true;
};
if (BitWidth == 1) {
if (tryUniqueRetValOptFor(true))
return true;
if (tryUniqueRetValOptFor(false))
return true;
}
return false;
}
void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Constant *Byte, Constant *Bit) {
for (auto Call : CSInfo.CallSites) {
auto *RetType = cast<IntegerType>(Call.CS.getType());
IRBuilder<> B(Call.CS.getInstruction());
Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte);
if (RetType->getBitWidth() == 1) {
Value *Bits = B.CreateLoad(Addr);
Value *BitsAndBit = B.CreateAnd(Bits, Bit);
auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
IsBitSet);
} else {
Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
Value *Val = B.CreateLoad(RetType, ValAddr);
Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, Val);
}
}
CSInfo.markDevirt();
}
bool DevirtModule::tryVirtualConstProp(
MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res, VTableSlot Slot) {
// This only works if the function returns an integer.
auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
if (!RetType)
return false;
unsigned BitWidth = RetType->getBitWidth();
if (BitWidth > 64)
return false;
// Make sure that each function is defined, does not access memory, takes at
// least one argument, does not use its first argument (which we assume is
// 'this'), and has the same return type.
//
// Note that we test whether this copy of the function is readnone, rather
// than testing function attributes, which must hold for any copy of the
// function, even a less optimized version substituted at link time. This is
// sound because the virtual constant propagation optimizations effectively
// inline all implementations of the virtual function into each call site,
// rather than using function attributes to perform local optimization.
for (VirtualCallTarget &Target : TargetsForSlot) {
if (Target.Fn->isDeclaration() ||
computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
MAK_ReadNone ||
Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
Target.Fn->getReturnType() != RetType)
return false;
}
for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
continue;
WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
if (Res)
ResByArg = &Res->ResByArg[CSByConstantArg.first];
if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
continue;
if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
ResByArg, Slot, CSByConstantArg.first))
continue;
// Find an allocation offset in bits in all vtables associated with the
// type.
uint64_t AllocBefore =
findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
uint64_t AllocAfter =
findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
// Calculate the total amount of padding needed to store a value at both
// ends of the object.
uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
for (auto &&Target : TargetsForSlot) {
TotalPaddingBefore += std::max<int64_t>(
(AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
TotalPaddingAfter += std::max<int64_t>(
(AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
}
// If the amount of padding is too large, give up.
// FIXME: do something smarter here.
if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
continue;
// Calculate the offset to the value as a (possibly negative) byte offset
// and (if applicable) a bit offset, and store the values in the targets.
int64_t OffsetByte;
uint64_t OffsetBit;
if (TotalPaddingBefore <= TotalPaddingAfter)
setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
OffsetBit);
else
setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
OffsetBit);
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
if (CSByConstantArg.second.isExported()) {
ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
exportGlobal(Slot, CSByConstantArg.first, "byte",
ConstantExpr::getIntToPtr(ByteConst, Int8PtrTy));
exportGlobal(Slot, CSByConstantArg.first, "bit",
ConstantExpr::getIntToPtr(BitConst, Int8PtrTy));
}
// Rewrite each call to a load from OffsetByte/OffsetBit.
applyVirtualConstProp(CSByConstantArg.second,
TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
}
return true;
}
void DevirtModule::rebuildGlobal(VTableBits &B) {
if (B.Before.Bytes.empty() && B.After.Bytes.empty())
return;
// Align each byte array to pointer width.
unsigned PointerSize = M.getDataLayout().getPointerSize();
B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize));
B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize));
// Before was stored in reverse order; flip it now.
for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
// Build an anonymous global containing the before bytes, followed by the
// original initializer, followed by the after bytes.
auto NewInit = ConstantStruct::getAnon(
{ConstantDataArray::get(M.getContext(), B.Before.Bytes),
B.GV->getInitializer(),
ConstantDataArray::get(M.getContext(), B.After.Bytes)});
auto NewGV =
new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
NewGV->setSection(B.GV->getSection());
NewGV->setComdat(B.GV->getComdat());
// Copy the original vtable's metadata to the anonymous global, adjusting
// offsets as required.
NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
// Build an alias named after the original global, pointing at the second
// element (the original initializer).
auto Alias = GlobalAlias::create(
B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
ConstantExpr::getGetElementPtr(
NewInit->getType(), NewGV,
ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
ConstantInt::get(Int32Ty, 1)}),
&M);
Alias->setVisibility(B.GV->getVisibility());
Alias->takeName(B.GV);
B.GV->replaceAllUsesWith(Alias);
B.GV->eraseFromParent();
}
bool DevirtModule::areRemarksEnabled() {
const auto &FL = M.getFunctionList();
if (FL.empty())
return false;
const Function &Fn = FL.front();
const auto &BBL = Fn.getBasicBlockList();
if (BBL.empty())
return false;
auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
return DI.isEnabled();
}
void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
Function *AssumeFunc) {
// Find all virtual calls via a virtual table pointer %p under an assumption
// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
// points to a member of the type identifier %md. Group calls by (type ID,
// offset) pair (effectively the identity of the virtual function) and store
// to CallSlots.
DenseSet<Value *> SeenPtrs;
for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
I != E;) {
auto CI = dyn_cast<CallInst>(I->getUser());
++I;
if (!CI)
continue;
// Search for virtual calls based on %p and add them to DevirtCalls.
SmallVector<DevirtCallSite, 1> DevirtCalls;
SmallVector<CallInst *, 1> Assumes;
findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
// If we found any, add them to CallSlots. Only do this if we haven't seen
// the vtable pointer before, as it may have been CSE'd with pointers from
// other call sites, and we don't want to process call sites multiple times.
if (!Assumes.empty()) {
Metadata *TypeId =
cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
if (SeenPtrs.insert(Ptr).second) {
for (DevirtCallSite Call : DevirtCalls) {
CallSlots[{TypeId, Call.Offset}].addCallSite(CI->getArgOperand(0),
Call.CS, nullptr);
}
}
}
// We no longer need the assumes or the type test.
for (auto Assume : Assumes)
Assume->eraseFromParent();
// We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
// may use the vtable argument later.
if (CI->use_empty())
CI->eraseFromParent();
}
}
void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
for (auto I = TypeCheckedLoadFunc->use_begin(),
E = TypeCheckedLoadFunc->use_end();
I != E;) {
auto CI = dyn_cast<CallInst>(I->getUser());
++I;
if (!CI)
continue;
Value *Ptr = CI->getArgOperand(0);
Value *Offset = CI->getArgOperand(1);
Value *TypeIdValue = CI->getArgOperand(2);
Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
SmallVector<DevirtCallSite, 1> DevirtCalls;
SmallVector<Instruction *, 1> LoadedPtrs;
SmallVector<Instruction *, 1> Preds;
bool HasNonCallUses = false;
findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
HasNonCallUses, CI);
// Start by generating "pessimistic" code that explicitly loads the function
// pointer from the vtable and performs the type check. If possible, we will
// eliminate the load and the type check later.
// If possible, only generate the load at the point where it is used.
// This helps avoid unnecessary spills.
IRBuilder<> LoadB(
(LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
for (Instruction *LoadedPtr : LoadedPtrs) {
LoadedPtr->replaceAllUsesWith(LoadedValue);
LoadedPtr->eraseFromParent();
}
// Likewise for the type test.
IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
for (Instruction *Pred : Preds) {
Pred->replaceAllUsesWith(TypeTestCall);
Pred->eraseFromParent();
}
// We have already erased any extractvalue instructions that refer to the
// intrinsic call, but the intrinsic may have other non-extractvalue uses
// (although this is unlikely). In that case, explicitly build a pair and
// RAUW it.
if (!CI->use_empty()) {
Value *Pair = UndefValue::get(CI->getType());
IRBuilder<> B(CI);
Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
CI->replaceAllUsesWith(Pair);
}
// The number of unsafe uses is initially the number of uses.
auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
NumUnsafeUses = DevirtCalls.size();
// If the function pointer has a non-call user, we cannot eliminate the type
// check, as one of those users may eventually call the pointer. Increment
// the unsafe use count to make sure it cannot reach zero.
if (HasNonCallUses)
++NumUnsafeUses;
for (DevirtCallSite Call : DevirtCalls) {
CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS,
&NumUnsafeUses);
}
CI->eraseFromParent();
}
}
void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
const TypeIdSummary *TidSummary =
ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
if (!TidSummary)
return;
auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
if (ResI == TidSummary->WPDRes.end())
return;
const WholeProgramDevirtResolution &Res = ResI->second;
if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
// The type of the function in the declaration is irrelevant because every
// call site will cast it to the correct type.
auto *SingleImpl = M.getOrInsertFunction(
Res.SingleImplName, Type::getVoidTy(M.getContext()));
// This is the import phase so we should not be exporting anything.
bool IsExported = false;
applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
assert(!IsExported);
}
for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
auto I = Res.ResByArg.find(CSByConstantArg.first);
if (I == Res.ResByArg.end())
continue;
auto &ResByArg = I->second;
// FIXME: We should figure out what to do about the "function name" argument
// to the apply* functions, as the function names are unavailable during the
// importing phase. For now we just pass the empty string. This does not
// impact correctness because the function names are just used for remarks.
switch (ResByArg.TheKind) {
case WholeProgramDevirtResolution::ByArg::UniformRetVal:
applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
break;
case WholeProgramDevirtResolution::ByArg::UniqueRetVal: {
Constant *UniqueMemberAddr =
importGlobal(Slot, CSByConstantArg.first, "unique_member");
applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
UniqueMemberAddr);
break;
}
case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {
Constant *Byte = importGlobal(Slot, CSByConstantArg.first, "byte", 32);
Byte = ConstantExpr::getPtrToInt(Byte, Int32Ty);
Constant *Bit = importGlobal(Slot, CSByConstantArg.first, "bit", 8);
Bit = ConstantExpr::getPtrToInt(Bit, Int8Ty);
applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
}
default:
break;
}
}
}
void DevirtModule::removeRedundantTypeTests() {
auto True = ConstantInt::getTrue(M.getContext());
for (auto &&U : NumUnsafeUsesForTypeTest) {
if (U.second == 0) {
U.first->replaceAllUsesWith(True);
U.first->eraseFromParent();
}
}
}
bool DevirtModule::run() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
// Normally if there are no users of the devirtualization intrinsics in the
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
if (!ExportSummary &&
(!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
return false;
if (TypeTestFunc && AssumeFunc)
scanTypeTestUsers(TypeTestFunc, AssumeFunc);
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
if (ImportSummary) {
for (auto &S : CallSlots)
importResolution(S.first, S.second);
removeRedundantTypeTests();
// The rest of the code is only necessary when exporting or during regular
// LTO, so we are done.
return true;
}
// Rebuild type metadata into a map for easy lookup.
std::vector<VTableBits> Bits;
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
buildTypeIdentifierMap(Bits, TypeIdMap);
if (TypeIdMap.empty())
return true;
// Collect information from summary about which calls to try to devirtualize.
if (ExportSummary) {
DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
for (auto &P : TypeIdMap) {
if (auto *TypeId = dyn_cast<MDString>(P.first))
MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
TypeId);
}
for (auto &P : *ExportSummary) {
for (auto &S : P.second) {
auto *FS = dyn_cast<FunctionSummary>(S.get());
if (!FS)
continue;
// FIXME: Only add live functions.
for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
for (Metadata *MD : MetadataByGUID[VF.GUID]) {
CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers =
true;
}
}
for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
for (Metadata *MD : MetadataByGUID[VF.GUID]) {
CallSlots[{MD, VF.Offset}]
.CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS);
}
}
for (const FunctionSummary::ConstVCall &VC :
FS->type_test_assume_const_vcalls()) {
for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
CallSlots[{MD, VC.VFunc.Offset}]
.ConstCSInfo[VC.Args]
.SummaryHasTypeTestAssumeUsers = true;
}
}
for (const FunctionSummary::ConstVCall &VC :
FS->type_checked_load_const_vcalls()) {
for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
CallSlots[{MD, VC.VFunc.Offset}]
.ConstCSInfo[VC.Args]
.SummaryTypeCheckedLoadUsers.push_back(FS);
}
}
}
}
}
// For each (type, offset) pair:
bool DidVirtualConstProp = false;
std::map<std::string, Function*> DevirtTargets;
for (auto &S : CallSlots) {
// Search each of the members of the type identifier for the virtual
// function implementation at offset S.first.ByteOffset, and add to
// TargetsForSlot.
std::vector<VirtualCallTarget> TargetsForSlot;
if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
S.first.ByteOffset)) {
WholeProgramDevirtResolution *Res = nullptr;
if (ExportSummary && isa<MDString>(S.first.TypeID))
Res = &ExportSummary
->getOrInsertTypeIdSummary(
cast<MDString>(S.first.TypeID)->getString())
.WPDRes[S.first.ByteOffset];
if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) &&
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first))
DidVirtualConstProp = true;
// Collect functions devirtualized at least for one call site for stats.
if (RemarksEnabled)
for (const auto &T : TargetsForSlot)
if (T.WasDevirt)
DevirtTargets[T.Fn->getName()] = T.Fn;
}
// CFI-specific: if we are exporting and any llvm.type.checked.load
// intrinsics were *not* devirtualized, we need to add the resulting
// llvm.type.test intrinsics to the function summaries so that the
// LowerTypeTests pass will export them.
if (ExportSummary && isa<MDString>(S.first.TypeID)) {
auto GUID =
GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
for (auto FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
FS->addTypeTest(GUID);
for (auto &CCS : S.second.ConstCSInfo)
for (auto FS : CCS.second.SummaryTypeCheckedLoadUsers)
FS->addTypeTest(GUID);
}
}
if (RemarksEnabled) {
// Generate remarks for each devirtualized function.
for (const auto &DT : DevirtTargets) {
Function *F = DT.second;
DISubprogram *SP = F->getSubprogram();
emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, SP,
Twine("devirtualized ") + F->getName());
}
}
removeRedundantTypeTests();
// Rebuild each global we touched as part of virtual constant propagation to
// include the before and after bytes.
if (DidVirtualConstProp)
for (VTableBits &B : Bits)
rebuildGlobal(B);
return true;
}