1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[Attributor] Reorganize AAHeapToStack

In order to simplify future extensions, e.g., the merge of
AAHeapToShared in to AAHeapToStack, we reorganize AAHeapToStack and the
state we keep for each malloc-like call. The result is also less
confusing as we only track malloc-like calls, not all calls. Further, we
only perform the updates necessary for a malloc-like to argue it can go
to the stack, e.g., we won't check all uses if we moved on to the
"must-be-freed" argument.

This patch also uses Attributor helps to simplify the allocated size,
alignment, and the potentially freed objects.

Overall, this is mostly a reorganization and only the use of the
optimistic helpers should change (=improve) the capabilities a bit.

Differential Revision: https://reviews.llvm.org/D104993
This commit is contained in:
Johannes Doerfert 2021-06-25 18:24:01 -05:00
parent 330a2a1821
commit 1e74d94d7c
7 changed files with 387 additions and 186 deletions

View File

@ -1729,17 +1729,21 @@ public:
bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
bool CheckBBLivenessOnly = false);
bool CheckBBLivenessOnly = false,
bool CheckPotentiallyDead = false);
/// Check \p Pred on all call-like instructions (=CallBased derived).
///
/// See checkForAllCallLikeInstructions(...) for more information.
bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA) {
const AbstractAttribute &QueryingAA,
bool CheckBBLivenessOnly = false,
bool CheckPotentiallyDead = false) {
return checkForAllInstructions(Pred, QueryingAA,
{(unsigned)Instruction::Invoke,
(unsigned)Instruction::CallBr,
(unsigned)Instruction::Call});
(unsigned)Instruction::Call},
CheckBBLivenessOnly, CheckPotentiallyDead);
}
/// Check \p Pred on all Read/Write instructions.
@ -3493,9 +3497,6 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
/// Returns true if HeapToStack conversion is assumed to be possible.
virtual bool isAssumedHeapToStack(CallBase &CB) const = 0;
/// Returns true if HeapToStack conversion is known to be possible.
virtual bool isKnownHeapToStack(CallBase &CB) const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);

View File

@ -1023,7 +1023,7 @@ static bool checkForAllInstructionsImpl(
Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap,
function_ref<bool(Instruction &)> Pred, const AbstractAttribute *QueryingAA,
const AAIsDead *LivenessAA, const ArrayRef<unsigned> &Opcodes,
bool CheckBBLivenessOnly = false) {
bool CheckBBLivenessOnly = false, bool CheckPotentiallyDead = false) {
for (unsigned Opcode : Opcodes) {
// Check if we have instructions with this opcode at all first.
auto *Insts = OpcodeInstMap.lookup(Opcode);
@ -1032,8 +1032,9 @@ static bool checkForAllInstructionsImpl(
for (Instruction *I : *Insts) {
// Skip dead instructions.
if (A && A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
CheckBBLivenessOnly))
if (A && !CheckPotentiallyDead &&
A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
CheckBBLivenessOnly))
continue;
if (!Pred(*I))
@ -1046,7 +1047,8 @@ static bool checkForAllInstructionsImpl(
bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
bool CheckBBLivenessOnly) {
bool CheckBBLivenessOnly,
bool CheckPotentiallyDead) {
const IRPosition &IRP = QueryingAA.getIRPosition();
// Since we need to provide instructions we have to have an exact definition.
@ -1057,14 +1059,15 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
const auto *LivenessAA =
CheckBBLivenessOnly
(CheckBBLivenessOnly || CheckPotentiallyDead)
? nullptr
: &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
auto &OpcodeInstMap =
InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
LivenessAA, Opcodes, CheckBBLivenessOnly))
LivenessAA, Opcodes, CheckBBLivenessOnly,
CheckPotentiallyDead))
return false;
return true;

View File

@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/Constants.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@ -27,9 +29,12 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
@ -4837,23 +4842,117 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
};
/// ----------------------- Heap-To-Stack Conversion ---------------------------
struct AAHeapToStackImpl : public AAHeapToStack {
AAHeapToStackImpl(const IRPosition &IRP, Attributor &A)
struct AAHeapToStackFunction final : public AAHeapToStack {
struct AllocationInfo {
/// The call that allocates the memory.
CallBase *CB;
/// The kind of allocation.
const enum class AllocationKind {
MALLOC,
CALLOC,
ALIGNED_ALLOC,
} Kind;
/// The library function id for the allocation.
LibFunc LibraryFunctionId;
/// The status wrt. a rewrite.
enum {
STACK_DUE_TO_USE,
STACK_DUE_TO_FREE,
INVALID,
} Status = STACK_DUE_TO_USE;
/// Flag to indicate if we encountered a use that might free this allocation
/// but which is not in the deallocation infos.
bool HasPotentiallyFreeingUnknownUses = false;
/// The set of free calls that use this allocation.
SmallPtrSet<CallBase *, 1> PotentialFreeCalls;
};
struct DeallocationInfo {
/// The call that deallocates the memory.
CallBase *CB;
/// Flag to indicate if we don't know all objects this deallocation might
/// free..
bool MightFreeUnknownObjects = false;
/// The set of allocation calls that are potentially freed.
SmallPtrSet<CallBase *, 1> PotentialAllocationCalls;
};
AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
: AAHeapToStack(IRP, A) {}
void initialize(Attributor &A) override {
AAHeapToStack::initialize(A);
const Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
auto AllocationIdentifierCB = [&](Instruction &I) {
CallBase *CB = dyn_cast<CallBase>(&I);
if (!CB)
return true;
if (isFreeCall(CB, TLI)) {
DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB};
return true;
}
bool IsMalloc = isMallocLikeFn(CB, TLI);
bool IsAlignedAllocLike = !IsMalloc && isAlignedAllocLikeFn(CB, TLI);
bool IsCalloc =
!IsMalloc && !IsAlignedAllocLike && isCallocLikeFn(CB, TLI);
if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc)
return true;
auto Kind =
IsMalloc ? AllocationInfo::AllocationKind::MALLOC
: (IsCalloc ? AllocationInfo::AllocationKind::CALLOC
: AllocationInfo::AllocationKind::ALIGNED_ALLOC);
AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB, Kind};
AllocationInfos[CB] = AI;
TLI->getLibFunc(*CB, AI->LibraryFunctionId);
return true;
};
bool Success = A.checkForAllCallLikeInstructions(
AllocationIdentifierCB, *this, /* CheckBBLivenessOnly */ false,
/* CheckPotentiallyDead */ true);
(void)Success;
assert(Success && "Did not expect the call base visit callback to fail!");
}
const std::string getAsStr() const override {
return "[H2S] Mallocs Good/Bad: " + std::to_string(MallocCalls.size()) +
"/" + std::to_string(BadMallocCalls.size());
unsigned NumH2SMallocs = 0, NumInvalidMallocs = 0;
for (const auto &It : AllocationInfos) {
if (It.second->Status == AllocationInfo::INVALID)
++NumInvalidMallocs;
else
++NumH2SMallocs;
}
return "[H2S] Mallocs Good/Bad: " + std::to_string(NumH2SMallocs) + "/" +
std::to_string(NumInvalidMallocs);
}
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {
STATS_DECL(
MallocCalls, Function,
"Number of malloc/calloc/aligned_alloc calls converted to allocas");
for (auto &It : AllocationInfos)
if (It.second->Status != AllocationInfo::INVALID)
++BUILD_STAT_NAME(MallocCalls, Function);
}
bool isAssumedHeapToStack(CallBase &CB) const override {
return isValidState() && MallocCalls.contains(&CB) &&
!BadMallocCalls.count(&CB);
}
bool isKnownHeapToStack(CallBase &CB) const override {
return isValidState() && MallocCalls.contains(&CB) &&
!BadMallocCalls.count(&CB);
if (isValidState())
if (AllocationInfo *AI = AllocationInfos.lookup(&CB))
return AI->Status != AllocationInfo::INVALID;
return false;
}
ChangeStatus manifest(Attributor &A) override {
@ -4864,76 +4963,82 @@ struct AAHeapToStackImpl : public AAHeapToStack {
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
for (Instruction *MallocCall : MallocCalls) {
// This malloc cannot be replaced.
if (BadMallocCalls.count(MallocCall))
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
continue;
for (Instruction *FreeCall : FreesForMalloc[MallocCall]) {
for (CallBase *FreeCall : AI.PotentialFreeCalls) {
LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
A.deleteAfterManifest(*FreeCall);
HasChanged = ChangeStatus::CHANGED;
}
LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
LLVM_DEBUG(dbgs() << "H2S: Removing malloc-like call: " << *AI.CB
<< "\n");
auto Remark = [&](OptimizationRemark OR) {
LibFunc IsAllocShared;
if (auto *CB = dyn_cast<CallBase>(MallocCall)) {
TLI->getLibFunc(*CB, IsAllocShared);
if (TLI->getLibFunc(*AI.CB, IsAllocShared))
if (IsAllocShared == LibFunc___kmpc_alloc_shared)
return OR << "Moving globalized variable to the stack.";
}
return OR << "Moving memory allocation from the heap to the stack.";
};
A.emitRemark<OptimizationRemark>(MallocCall, "HeapToStack", Remark);
A.emitRemark<OptimizationRemark>(AI.CB, "HeapToStack", Remark);
Align Alignment;
Value *Size;
if (isCallocLikeFn(MallocCall, TLI)) {
auto *Num = MallocCall->getOperand(0);
auto *SizeT = MallocCall->getOperand(1);
IRBuilder<> B(MallocCall);
Optional<APInt> SizeAPI = getSize(A, *this, AI);
if (SizeAPI.hasValue()) {
Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI);
} else if (AI.Kind == AllocationInfo::AllocationKind::CALLOC) {
auto *Num = AI.CB->getOperand(0);
auto *SizeT = AI.CB->getOperand(1);
IRBuilder<> B(AI.CB);
Size = B.CreateMul(Num, SizeT, "h2s.calloc.size");
} else if (isAlignedAllocLikeFn(MallocCall, TLI)) {
Size = MallocCall->getOperand(1);
Alignment = MaybeAlign(cast<ConstantInt>(MallocCall->getOperand(0))
->getValue()
.getZExtValue())
.valueOrOne();
} else if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC) {
Size = AI.CB->getOperand(1);
} else {
Size = MallocCall->getOperand(0);
Size = AI.CB->getOperand(0);
}
unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace();
Instruction *AI =
Align Alignment(1);
if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC) {
Optional<APInt> AlignmentAPI =
getAPInt(A, *this, *AI.CB->getArgOperand(0));
assert(AlignmentAPI.hasValue() &&
"Expected an alignment during manifest!");
Alignment =
max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue()));
}
unsigned AS = cast<PointerType>(AI.CB->getType())->getAddressSpace();
Instruction *Alloca =
new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
"", MallocCall->getNextNode());
"", AI.CB->getNextNode());
if (AI->getType() != MallocCall->getType())
AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
AI->getNextNode());
if (Alloca->getType() != AI.CB->getType())
Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc",
Alloca->getNextNode());
A.changeValueAfterManifest(*MallocCall, *AI);
A.changeValueAfterManifest(*AI.CB, *Alloca);
if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
if (auto *II = dyn_cast<InvokeInst>(AI.CB)) {
auto *NBB = II->getNormalDest();
BranchInst::Create(NBB, MallocCall->getParent());
A.deleteAfterManifest(*MallocCall);
BranchInst::Create(NBB, AI.CB->getParent());
A.deleteAfterManifest(*AI.CB);
} else {
A.deleteAfterManifest(*MallocCall);
A.deleteAfterManifest(*AI.CB);
}
// Zero out the allocated memory if it was a calloc.
if (isCallocLikeFn(MallocCall, TLI)) {
auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc",
AI->getNextNode());
if (AI.Kind == AllocationInfo::AllocationKind::CALLOC) {
auto *BI = new BitCastInst(Alloca, AI.CB->getType(), "calloc_bc",
Alloca->getNextNode());
Value *Ops[] = {
BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size,
ConstantInt::get(Type::getInt1Ty(F->getContext()), false)};
Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()};
Type *Tys[] = {BI->getType(), AI.CB->getOperand(0)->getType()};
Module *M = F->getParent();
Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
CallInst::Create(Fn, Ops, "", BI->getNextNode());
@ -4944,21 +5049,58 @@ struct AAHeapToStackImpl : public AAHeapToStack {
return HasChanged;
}
/// Collection of all malloc calls in a function.
SmallSetVector<Instruction *, 4> MallocCalls;
Optional<APInt> getAPInt(Attributor &A, const AbstractAttribute &AA,
Value &V) {
bool UsedAssumedInformation = false;
Optional<Constant *> SimpleV =
A.getAssumedConstant(V, AA, UsedAssumedInformation);
if (!SimpleV.hasValue())
return APInt(64, 0);
if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
return CI->getValue();
return llvm::None;
}
/// Collection of malloc calls that cannot be converted.
DenseSet<const Instruction *> BadMallocCalls;
Optional<APInt> getSize(Attributor &A, const AbstractAttribute &AA,
AllocationInfo &AI) {
/// A map for each malloc call to the set of associated free calls.
DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc;
if (AI.Kind == AllocationInfo::AllocationKind::MALLOC)
return getAPInt(A, AA, *AI.CB->getArgOperand(0));
if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC)
// Only if the alignment is also constant we return a size.
return getAPInt(A, AA, *AI.CB->getArgOperand(0)).hasValue()
? getAPInt(A, AA, *AI.CB->getArgOperand(1))
: llvm::None;
assert(AI.Kind == AllocationInfo::AllocationKind::CALLOC &&
"Expected only callocs are left");
Optional<APInt> Num = getAPInt(A, AA, *AI.CB->getArgOperand(0));
Optional<APInt> Size = getAPInt(A, AA, *AI.CB->getArgOperand(1));
if (!Num.hasValue() || !Size.hasValue())
return llvm::None;
bool Overflow = false;
Size = Size.getValue().umul_ov(Num.getValue(), Overflow);
return Overflow ? llvm::None : Size;
}
/// Collection of all malloc-like calls in a function with associated
/// information.
DenseMap<CallBase *, AllocationInfo *> AllocationInfos;
/// Collection of all free-like calls in a function with associated
/// information.
DenseMap<CallBase *, DeallocationInfo *> DeallocationInfos;
ChangeStatus updateImpl(Attributor &A) override;
};
ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
const Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
const auto &LivenessAA =
A.getAAFor<AAIsDead>(*this, IRPosition::function(*F), DepClassTy::NONE);
MustBeExecutedContextExplorer &Explorer =
A.getInfoCache().getMustBeExecutedContextExplorer();
@ -4966,7 +5108,66 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
bool StackIsAccessibleByOtherThreads =
A.getInfoCache().stackIsAccessibleByOtherThreads();
auto FreeCheck = [&](Instruction &I) {
// Flag to ensure we update our deallocation information at most once per
// updateImpl call and only if we use the free check reasoning.
bool HasUpdatedFrees = false;
auto UpdateFrees = [&]() {
HasUpdatedFrees = true;
for (auto &It : DeallocationInfos) {
DeallocationInfo &DI = *It.second;
// For now we cannot use deallocations that have unknown inputs, skip
// them.
if (DI.MightFreeUnknownObjects)
continue;
// No need to analyze dead calls, ignore them instead.
if (A.isAssumedDead(*DI.CB, this, &LivenessAA,
/* CheckBBLivenessOnly */ true))
continue;
// Use the optimistic version to get the freed objects, ignoring dead
// branches etc.
SmallVector<Value *, 8> Objects;
if (!getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects,
*this, DI.CB)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n");
DI.MightFreeUnknownObjects = true;
continue;
}
// Check each object explicitly.
for (auto *Obj : Objects) {
// Free of null and undef can be ignored as no-ops (or UB in the latter
// case).
if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj))
continue;
CallBase *ObjCB = dyn_cast<CallBase>(Obj);
if (!ObjCB) {
LLVM_DEBUG(dbgs()
<< "[H2S] Free of a non-call object: " << *Obj << "\n");
DI.MightFreeUnknownObjects = true;
continue;
}
AllocationInfo *AI = AllocationInfos.lookup(ObjCB);
if (!AI) {
LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj
<< "\n");
DI.MightFreeUnknownObjects = true;
continue;
}
DI.PotentialAllocationCalls.insert(ObjCB);
}
}
};
auto FreeCheck = [&](AllocationInfo &AI) {
// If the stack is not accessible by other threads, the "must-free" logic
// doesn't apply as the pointer could be shared and needs to be places in
// "shareable" memory.
@ -4980,19 +5181,55 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
return false;
}
}
const auto &Frees = FreesForMalloc.lookup(&I);
if (Frees.size() != 1) {
if (!HasUpdatedFrees)
UpdateFrees();
// TODO: Allow multi exit functions that have different free calls.
if (AI.PotentialFreeCalls.size() != 1) {
LLVM_DEBUG(dbgs() << "[H2S] did not find one free call but "
<< Frees.size() << "\n");
<< AI.PotentialFreeCalls.size() << "\n");
return false;
}
Instruction *UniqueFree = *Frees.begin();
return Explorer.findInContextOf(UniqueFree, I.getNextNode());
CallBase *UniqueFree = *AI.PotentialFreeCalls.begin();
DeallocationInfo *DI = DeallocationInfos.lookup(UniqueFree);
if (!DI) {
LLVM_DEBUG(
dbgs() << "[H2S] unique free call was not known as deallocation call "
<< *UniqueFree << "\n");
return false;
}
if (DI->MightFreeUnknownObjects) {
LLVM_DEBUG(
dbgs() << "[H2S] unique free call might free unkown allocations\n");
return false;
}
if (DI->PotentialAllocationCalls.size() > 1) {
LLVM_DEBUG(dbgs() << "[H2S] unique free call might free "
<< DI->PotentialAllocationCalls.size()
<< " different allocations\n");
return false;
}
if (*DI->PotentialAllocationCalls.begin() != AI.CB) {
LLVM_DEBUG(
dbgs()
<< "[H2S] unique free call not known to free this allocation but "
<< **DI->PotentialAllocationCalls.begin() << "\n");
return false;
}
Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
if (!Explorer.findInContextOf(UniqueFree, CtxI)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] unique free call might not be executed with the allocation "
<< *UniqueFree << "\n");
return false;
}
return true;
};
auto UsesCheck = [&](Instruction &I) {
auto UsesCheck = [&](AllocationInfo &AI) {
bool ValidUsesOnly = true;
bool MustUse = true;
auto Pred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
if (isa<LoadInst>(UserI))
@ -5010,15 +5247,8 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
if (auto *CB = dyn_cast<CallBase>(UserI)) {
if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd())
return true;
// Record malloc.
if (isFreeCall(UserI, TLI)) {
if (MustUse) {
FreesForMalloc[&I].insert(UserI);
} else {
LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: "
<< *UserI << "\n");
ValidUsesOnly = false;
}
if (DeallocationInfos.count(CB)) {
AI.PotentialFreeCalls.insert(CB);
return true;
}
@ -5033,8 +5263,12 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::OPTIONAL);
if (!NoCaptureAA.isAssumedNoCapture() ||
!ArgNoFreeAA.isAssumedNoFree()) {
bool MaybeCaptured = !NoCaptureAA.isAssumedNoCapture();
bool MaybeFreed = !ArgNoFreeAA.isAssumedNoFree();
if (MaybeCaptured ||
(AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared &&
MaybeFreed)) {
AI.HasPotentiallyFreeingUnknownUses |= MaybeFreed;
// Emit a missed remark if this is missed OpenMP globalization.
auto Remark = [&](OptimizationRemarkMissed ORM) {
@ -5045,13 +5279,9 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
<< "Mark as noescape to override.";
};
LibFunc IsAllocShared;
if (auto *AllocShared = dyn_cast<CallBase>(&I)) {
TLI->getLibFunc(*AllocShared, IsAllocShared);
if (IsAllocShared == LibFunc___kmpc_alloc_shared)
A.emitRemark<OptimizationRemarkMissed>(
AllocShared, "HeapToStackFailed", Remark);
}
if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared)
A.emitRemark<OptimizationRemarkMissed>(AI.CB, "HeapToStackFailed",
Remark);
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
ValidUsesOnly = false;
@ -5061,7 +5291,6 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI));
Follow = true;
return true;
}
@ -5071,95 +5300,63 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
ValidUsesOnly = false;
return true;
};
A.checkForAllUses(Pred, *this, I);
A.checkForAllUses(Pred, *this, *AI.CB);
return ValidUsesOnly;
};
auto MallocCallocCheck = [&](Instruction &I) {
if (BadMallocCalls.count(&I))
return true;
// The actual update starts here. We look at all allocations and depending on
// their status perform the appropriate check(s).
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
continue;
bool IsMalloc = isMallocLikeFn(&I, TLI);
bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI);
bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI);
if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) {
BadMallocCalls.insert(&I);
return true;
if (MaxHeapToStackSize == -1) {
if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC)
if (!getAPInt(A, *this, *AI.CB->getArgOperand(0)).hasValue()) {
LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB
<< "\n");
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
}
} else {
Optional<APInt> Size = getSize(A, *this, AI);
if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
LLVM_DEBUG({
if (!Size.hasValue())
dbgs() << "[H2S] Unknown allocation size (or alignment): " << *AI.CB
<< "\n";
else
dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
<< MaxHeapToStackSize << "\n";
});
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
}
}
if (IsMalloc) {
if (MaxHeapToStackSize == -1) {
if (UsesCheck(I) || FreeCheck(I)) {
MallocCalls.insert(&I);
return true;
}
}
if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
if (Size->getValue().ule(MaxHeapToStackSize))
if (UsesCheck(I) || FreeCheck(I)) {
MallocCalls.insert(&I);
return true;
}
} else if (IsAlignedAllocLike && isa<ConstantInt>(I.getOperand(0))) {
if (MaxHeapToStackSize == -1) {
if (UsesCheck(I) || FreeCheck(I)) {
MallocCalls.insert(&I);
return true;
}
}
// Only if the alignment and sizes are constant.
if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
if (Size->getValue().ule(MaxHeapToStackSize))
if (UsesCheck(I) || FreeCheck(I)) {
MallocCalls.insert(&I);
return true;
}
} else if (IsCalloc) {
if (MaxHeapToStackSize == -1) {
if (UsesCheck(I) || FreeCheck(I)) {
MallocCalls.insert(&I);
return true;
}
}
bool Overflow = false;
if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
.ule(MaxHeapToStackSize))
if (!Overflow && (UsesCheck(I) || FreeCheck(I))) {
MallocCalls.insert(&I);
return true;
}
}
BadMallocCalls.insert(&I);
return true;
};
size_t NumBadMallocs = BadMallocCalls.size();
A.checkForAllCallLikeInstructions(MallocCallocCheck, *this);
if (NumBadMallocs != BadMallocCalls.size())
return ChangeStatus::CHANGED;
return ChangeStatus::UNCHANGED;
}
struct AAHeapToStackFunction final : public AAHeapToStackImpl {
AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
: AAHeapToStackImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {
STATS_DECL(
MallocCalls, Function,
"Number of malloc/calloc/aligned_alloc calls converted to allocas");
for (auto *C : MallocCalls)
if (!BadMallocCalls.count(C))
++BUILD_STAT_NAME(MallocCalls, Function);
switch (AI.Status) {
case AllocationInfo::STACK_DUE_TO_USE:
if (UsesCheck(AI))
continue;
AI.Status = AllocationInfo::STACK_DUE_TO_FREE;
LLVM_FALLTHROUGH;
case AllocationInfo::STACK_DUE_TO_FREE:
if (FreeCheck(AI))
continue;
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
case AllocationInfo::INVALID:
llvm_unreachable("Invalid allocations should never reach this point!");
};
}
};
return Changed;
}
/// ----------------------- Privatizable Pointers ------------------------------
struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {

View File

@ -2472,7 +2472,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (CallBase *CB : MallocCalls) {
// Skip replacing this if HeapToStack has already claimed it.
if (HS && HS->isKnownHeapToStack(*CB))
if (HS && HS->isAssumedHeapToStack(*CB))
continue;
// Find the unique free call to remove it.

View File

@ -123,7 +123,7 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
; GRAPH-EMPTY:
; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs Good/Bad: 0/1
; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs Good/Bad: 0/0
; GRAPH-EMPTY:
; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state not-simple
; GRAPH-EMPTY:

View File

@ -55,21 +55,21 @@ define void @h2s_value_simplify_interaction(i1 %c, i8* %A) {
; IS________NPM-LABEL: define {{[^@]+}}@h2s_value_simplify_interaction
; IS________NPM-SAME: (i1 [[C:%.*]], i8* nocapture nofree [[A:%.*]]) {
; IS________NPM-NEXT: entry:
; IS________NPM-NEXT: [[M:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
; IS________NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
; IS________NPM: t:
; IS________NPM-NEXT: br i1 false, label [[DEAD:%.*]], label [[F2:%.*]]
; IS________NPM: f:
; IS________NPM-NEXT: br label [[J:%.*]]
; IS________NPM: f2:
; IS________NPM-NEXT: [[L:%.*]] = load i8, i8* [[M]], align 1
; IS________NPM-NEXT: [[L:%.*]] = load i8, i8* [[TMP0]], align 1
; IS________NPM-NEXT: call void @usei8(i8 [[L]])
; IS________NPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef [[M]]) #[[ATTR6:[0-9]+]]
; IS________NPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
; IS________NPM-NEXT: br label [[J]]
; IS________NPM: dead:
; IS________NPM-NEXT: unreachable
; IS________NPM: j:
; IS________NPM-NEXT: [[PHI:%.*]] = phi i8* [ [[M]], [[F]] ], [ null, [[F2]] ]
; IS________NPM-NEXT: [[PHI:%.*]] = phi i8* [ [[TMP0]], [[F]] ], [ null, [[F2]] ]
; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[PHI]]) #[[ATTR6]]
; IS________NPM-NEXT: ret void
;

View File

@ -13,8 +13,8 @@ target triple = "nvptx64"
define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-NEXT: call void @bar() #[[ATTR0]]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: ret void
;
entry:
@ -25,7 +25,7 @@ entry:
define internal void @foo() {
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1
; CHECK-NEXT: ret void
@ -41,7 +41,7 @@ define internal void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]]
; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG6:![0-9]+]]
; CHECK-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]]) #[[ATTR0]]
; CHECK-NEXT: ret void