1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[Analysis, Transforms] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 312383
This commit is contained in:
Eugene Zelenko 2017-09-01 21:37:29 +00:00
parent fe8f0ed2ee
commit cbd8f32d28
9 changed files with 536 additions and 294 deletions

View File

@ -1,4 +1,4 @@
//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume ---*- C++ -*-===//
//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -18,9 +18,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@ -28,6 +27,11 @@
namespace llvm {
class CallInst;
class Function;
class raw_ostream;
class Value;
/// \brief A cache of @llvm.assume calls within a function.
///
/// This cache provides fast lookup of assumptions within a function by caching
@ -47,6 +51,7 @@ class AssumptionCache {
class AffectedValueCallbackVH final : public CallbackVH {
AssumptionCache *AC;
void deleted() override;
void allUsesReplacedWith(Value *) override;
@ -76,7 +81,7 @@ class AssumptionCache {
///
/// We want to be as lazy about this as possible, and so we scan the function
/// at the last moment.
bool Scanned;
bool Scanned = false;
/// \brief Scan the function for assumptions and add them to the cache.
void scanFunction();
@ -84,7 +89,7 @@ class AssumptionCache {
public:
/// \brief Construct an AssumptionCache from a function by scanning all of
/// its instructions.
AssumptionCache(Function &F) : F(F), Scanned(false) {}
AssumptionCache(Function &F) : F(F) {}
/// This cache is designed to be self-updating and so it should never be
/// invalidated.
@ -145,10 +150,11 @@ public:
/// assumption caches for a given function.
class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> {
friend AnalysisInfoMixin<AssumptionAnalysis>;
static AnalysisKey Key;
public:
typedef AssumptionCache Result;
using Result = AssumptionCache;
AssumptionCache run(Function &F, FunctionAnalysisManager &) {
return AssumptionCache(F);
@ -161,6 +167,7 @@ class AssumptionPrinterPass : public PassInfoMixin<AssumptionPrinterPass> {
public:
explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@ -177,10 +184,11 @@ class AssumptionCacheTracker : public ImmutablePass {
/// delete our cache of intrinsics for a function when it is deleted.
class FunctionCallbackVH final : public CallbackVH {
AssumptionCacheTracker *ACT;
void deleted() override;
public:
typedef DenseMapInfo<Value *> DMI;
using DMI = DenseMapInfo<Value *>;
FunctionCallbackVH(Value *V, AssumptionCacheTracker *ACT = nullptr)
: CallbackVH(V), ACT(ACT) {}
@ -188,8 +196,10 @@ class AssumptionCacheTracker : public ImmutablePass {
friend FunctionCallbackVH;
typedef DenseMap<FunctionCallbackVH, std::unique_ptr<AssumptionCache>,
FunctionCallbackVH::DMI> FunctionCallsMap;
using FunctionCallsMap =
DenseMap<FunctionCallbackVH, std::unique_ptr<AssumptionCache>,
FunctionCallbackVH::DMI>;
FunctionCallsMap AssumptionCaches;
public:
@ -208,6 +218,7 @@ public:
}
void verifyAnalysis() const override;
bool doFinalization(Module &) override {
verifyAnalysis();
return false;
@ -218,4 +229,4 @@ public:
} // end namespace llvm
#endif
#endif // LLVM_ANALYSIS_ASSUMPTIONCACHE_H

View File

@ -15,32 +15,32 @@
#ifndef LLVM_ANALYSIS_VALUETRACKING_H
#define LLVM_ANALYSIS_VALUETRACKING_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/IR/Intrinsics.h"
#include <cassert>
#include <cstdint>
namespace llvm {
template <typename T> class ArrayRef;
class APInt;
class AddOperator;
class AssumptionCache;
class DataLayout;
class DominatorTree;
class GEPOperator;
class Instruction;
struct KnownBits;
class Loop;
class LoopInfo;
class OptimizationRemarkEmitter;
class MDNode;
class StringRef;
class TargetLibraryInfo;
class Value;
namespace Intrinsic {
enum ID : unsigned;
}
class AddOperator;
class APInt;
class AssumptionCache;
class DataLayout;
class DominatorTree;
class GEPOperator;
class IntrinsicInst;
struct KnownBits;
class Loop;
class LoopInfo;
class MDNode;
class OptimizationRemarkEmitter;
class StringRef;
class TargetLibraryInfo;
class Value;
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
@ -56,17 +56,20 @@ template <typename T> class ArrayRef;
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
/// Returns the known bits rather than passing by reference.
KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth = 0, AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
/// Compute known bits from the range metadata.
/// \p KnownZero the set of bits that are known to be zero
/// \p KnownOne the set of bits that are known to be one
void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
KnownBits &Known);
/// Return true if LHS and RHS have no common bits set.
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
const DataLayout &DL,
@ -180,7 +183,6 @@ template <typename T> class ArrayRef;
/// -0 --> true
/// x > +0 --> true
/// x < -0 --> false
///
bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
/// Return true if we can prove that the specified FP value's sign bit is 0.
@ -190,7 +192,6 @@ template <typename T> class ArrayRef;
/// -0 --> false
/// x > +0 --> true
/// x < -0 --> false
///
bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
/// If the specified value can be set by repeating the same byte in memory,
@ -231,8 +232,10 @@ template <typename T> class ArrayRef;
/// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid
/// initializer, it just doesn't fit the ConstantDataArray interface).
const ConstantDataArray *Array;
/// Slice starts at this Offset.
uint64_t Offset;
/// Length of the slice.
uint64_t Length;
@ -242,6 +245,7 @@ template <typename T> class ArrayRef;
Offset += Delta;
Length -= Delta;
}
/// Convenience accessor for elements in the slice.
uint64_t operator[](unsigned I) const {
return Array==nullptr ? 0 : Array->getElementAsInteger(I + Offset);
@ -378,6 +382,7 @@ template <typename T> class ArrayRef;
const DominatorTree *DT = nullptr);
enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
const Value *RHS,
const DataLayout &DL,
@ -466,6 +471,7 @@ template <typename T> class ArrayRef;
SPF_ABS, /// Absolute value
SPF_NABS /// Negated absolute value
};
/// \brief Behavior when a floating point min/max is given one NaN and one
/// non-NaN as input.
enum SelectPatternNaNBehavior {
@ -476,6 +482,7 @@ template <typename T> class ArrayRef;
/// it has been determined that no operands can
/// be NaN).
};
struct SelectPatternResult {
SelectPatternFlavor Flavor;
SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is
@ -489,6 +496,7 @@ template <typename T> class ArrayRef;
return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS);
}
};
/// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind
/// and providing the out parameter results if we successfully match.
///
@ -532,4 +540,4 @@ template <typename T> class ArrayRef;
unsigned Depth = 0);
} // end namespace llvm
#endif
#endif // LLVM_ANALYSIS_VALUETRACKING_H

View File

@ -17,15 +17,23 @@
#define LLVM_TRANSFORMS_SCALAR_SROA_H
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Compiler.h"
#include <vector>
namespace llvm {
class AllocaInst;
class AssumptionCache;
class DominatorTree;
class Function;
class Instruction;
class LLVMContext;
class PHINode;
class SelectInst;
class Use;
/// A private "module" namespace for types and utilities used by SROA. These
/// are implementation details and should not be used by clients.
namespace sroa LLVM_LIBRARY_VISIBILITY {

View File

@ -1,4 +1,4 @@
//===---- SLPVectorizer.h ---------------------------------------*- C++ -*-===//
//===- SLPVectorizer.h ------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -19,30 +19,48 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H
#define LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
namespace llvm {
class AssumptionCache;
class BasicBlock;
class CmpInst;
class DataLayout;
class DemandedBits;
class DominatorTree;
class Function;
class InsertElementInst;
class InsertValueInst;
class Instruction;
class LoopInfo;
class OptimizationRemarkEmitter;
class PHINode;
class ScalarEvolution;
class StoreInst;
class TargetLibraryInfo;
class TargetTransformInfo;
class Value;
/// A private "module" namespace for types and utilities used by this pass.
/// These are implementation details and should not be used by clients.
namespace slpvectorizer {
class BoUpSLP;
}
} // end namespace slpvectorizer
struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
typedef SmallVector<StoreInst *, 8> StoreList;
typedef MapVector<Value *, StoreList> StoreListMap;
typedef SmallVector<WeakTrackingVH, 8> WeakTrackingVHList;
typedef MapVector<Value *, WeakTrackingVHList> WeakTrackingVHListMap;
using StoreList = SmallVector<StoreInst *, 8>;
using StoreListMap = MapVector<Value *, StoreList>;
using WeakTrackingVHList = SmallVector<WeakTrackingVH, 8>;
using WeakTrackingVHListMap = MapVector<Value *, WeakTrackingVHList>;
ScalarEvolution *SE = nullptr;
TargetTransformInfo *TTI = nullptr;
@ -103,11 +121,14 @@ private:
/// Try to vectorize trees that start at insertvalue instructions.
bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB,
slpvectorizer::BoUpSLP &R);
/// Try to vectorize trees that start at insertelement instructions.
bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB,
slpvectorizer::BoUpSLP &R);
/// Try to vectorize trees that start at compare instructions.
bool vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, slpvectorizer::BoUpSLP &R);
/// Tries to vectorize constructs started from CmpInst, InsertValueInst or
/// InsertElementInst instructions.
bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
@ -128,6 +149,7 @@ private:
/// The getelementptr instructions in a basic block organized by base pointer.
WeakTrackingVHListMap GEPs;
};
}
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H

View File

@ -13,14 +13,26 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <utility>
using namespace llvm;
using namespace llvm::PatternMatch;
@ -255,8 +267,9 @@ AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry());
}
AssumptionCacheTracker::~AssumptionCacheTracker() {}
AssumptionCacheTracker::~AssumptionCacheTracker() = default;
char AssumptionCacheTracker::ID = 0;
INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker",
"Assumption Cache Tracker", false, true)
char AssumptionCacheTracker::ID = 0;

View File

@ -13,37 +13,66 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Support/Debug.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <array>
#include <cstring>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
using namespace llvm;
using namespace llvm::PatternMatch;
@ -70,6 +99,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
}
namespace {
// Simplifying using an assume can only be done in a particular control-flow
// context (the context instruction provides that context). If an assume and
// the context instruction are not in the same block then the DT helps in
@ -79,6 +109,7 @@ struct Query {
AssumptionCache *AC;
const Instruction *CxtI;
const DominatorTree *DT;
// Unlike the other analyses, this may be a nullptr because not all clients
// provide it currently.
OptimizationRemarkEmitter *ORE;
@ -92,11 +123,12 @@ struct Query {
/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
/// (all of which can call computeKnownBits), and so on.
std::array<const Value *, MaxDepth> Excluded;
unsigned NumExcluded;
unsigned NumExcluded = 0;
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
: DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
: DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {}
Query(const Query &Q, const Value *NewExcl)
: DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
@ -113,6 +145,7 @@ struct Query {
return std::find(Excluded.begin(), End, Value) != End;
}
};
} // end anonymous namespace
// Given the provided Value and, potentially, a context instruction, return
@ -171,7 +204,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
}
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
for (const User *U : CxtI->users()) {
if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
@ -380,7 +412,9 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
continue;
// If all uses of this value are ephemeral, then so is this value.
if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
if (llvm::all_of(V->users(), [&](const User *U) {
return EphValues.count(U);
})) {
if (V == E)
return true;
@ -423,7 +457,6 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
bool llvm::isValidAssumeForContext(const Instruction *Inv,
const Instruction *CxtI,
const DominatorTree *DT) {
// There are two restrictions on the use of an assume:
// 1. The assume must dominate the context (or the control flow must
// reach the assume whenever it reaches the context).
@ -891,7 +924,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
}
break;
}
case Instruction::Or: {
case Instruction::Or:
computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
@ -900,7 +933,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// Output known-1 are known to be set if set in either the LHS | RHS.
Known.One |= Known2.One;
break;
}
case Instruction::Xor: {
computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
@ -1911,7 +1943,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
}
// Check if all incoming values are non-zero constant.
bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) {
bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) {
return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero();
});
if (AllNonZeroConstants)
@ -2494,7 +2526,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
///
/// NOTE: this function will need to be revisited when we support non-default
/// rounding modes!
///
bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
unsigned Depth) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
@ -2723,7 +2754,6 @@ Value *llvm::isBytewiseValue(Value *V) {
return nullptr;
}
// This is the recursive version of BuildSubAggregate. It takes a few different
// arguments. Idxs is the index within the nested struct From that we are
// looking at now (which is of type IndexedType). IdxSkip is the number of
@ -2734,7 +2764,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
SmallVectorImpl<unsigned> &Idxs,
unsigned IdxSkip,
Instruction *InsertBefore) {
llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
StructType *STy = dyn_cast<StructType>(IndexedType);
if (STy) {
// Save the original To argument so we can modify it
Value *OrigTo = To;
@ -2773,8 +2803,8 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
return nullptr;
// Insert the value in the new (sub) aggregrate
return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
"tmp", InsertBefore);
return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
"tmp", InsertBefore);
}
// This helper takes a nested struct and extracts a part of it (which is again a
@ -3745,7 +3775,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
return true;
};
return any_of(GuardingBranches, AllUsesGuardedByBranch);
return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
}
@ -3949,7 +3979,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
}
break;
};
}
return false;
}

View File

@ -12,37 +12,67 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <set>
#include <string>
#include <utility>
#include <vector>
#define DEBUG_TYPE "rewrite-statepoints-for-gc"
@ -53,6 +83,7 @@ static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
cl::init(false));
static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
cl::init(false));
// Print out the base pointers for debugging
static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
cl::init(false));
@ -68,6 +99,7 @@ static bool ClobberNonLive = true;
#else
static bool ClobberNonLive = false;
#endif
static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
cl::location(ClobberNonLive),
cl::Hidden);
@ -77,13 +109,16 @@ static cl::opt<bool>
cl::Hidden, cl::init(true));
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
RewriteStatepointsForGC() : ModulePass(ID) {
initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F);
bool runOnModule(Module &M) override {
bool Changed = false;
for (Function &F : M)
@ -121,12 +156,14 @@ struct RewriteStatepointsForGC : public ModulePass {
// Helpers for stripNonValidAttributesAndMetadata
void stripNonValidAttributesAndMetadataFromBody(Function &F);
void stripNonValidAttributesFromPrototype(Function &F);
// Certain metadata on instructions are invalid after running RS4GC.
// Optimizations that run after RS4GC can incorrectly use this metadata to
// optimize functions. We drop such metadata on the instruction.
void stripInvalidMetadataFromInstruction(Instruction &I);
};
} // namespace
} // end anonymous namespace
char RewriteStatepointsForGC::ID = 0;
@ -142,9 +179,11 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
"Make relocations explicit at statepoints", false, false)
namespace {
struct GCPtrLivenessData {
/// Values defined in this block.
MapVector<BasicBlock *, SetVector<Value *>> KillSet;
/// Values used in this block (and thus live); does not included values
/// killed within this block.
MapVector<BasicBlock *, SetVector<Value *>> LiveSet;
@ -168,10 +207,10 @@ struct GCPtrLivenessData {
// Generally, after the execution of a full findBasePointer call, only the
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
typedef MapVector<Value *, Value *> DefiningValueMapTy;
typedef SetVector<Value *> StatepointLiveSetTy;
typedef MapVector<AssertingVH<Instruction>, AssertingVH<Value>>
RematerializedValueMapTy;
using DefiningValueMapTy = MapVector<Value *, Value *>;
using StatepointLiveSetTy = SetVector<Value *>;
using RematerializedValueMapTy =
MapVector<AssertingVH<Instruction>, AssertingVH<Value>>;
struct PartiallyConstructedSafepointRecord {
/// The set of values known to be live across this safepoint
@ -193,7 +232,8 @@ struct PartiallyConstructedSafepointRecord {
/// Maps rematerialized copy to it's original value.
RematerializedValueMapTy RematerializedValues;
};
}
} // end anonymous namespace
static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
Optional<OperandBundleUse> DeoptBundle =
@ -256,7 +296,7 @@ static bool containsGCPtrType(Type *Ty) {
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
return any_of(ST->subtypes(), containsGCPtrType);
return llvm::any_of(ST->subtypes(), containsGCPtrType);
return false;
}
@ -301,7 +341,9 @@ analyzeParsePointLiveness(DominatorTree &DT,
}
static bool isKnownBaseResult(Value *V);
namespace {
/// A single base defining value - An immediate base defining value for an
/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
/// For instructions which have multiple pointer [vector] inputs or that
@ -313,9 +355,11 @@ namespace {
struct BaseDefiningValueResult {
/// Contains the value which is the base defining value.
Value * const BDV;
/// True if the base defining value is also known to be an actual base
/// pointer.
const bool IsKnownBase;
BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
: BDV(BDV), IsKnownBase(IsKnownBase) {
#ifndef NDEBUG
@ -326,7 +370,8 @@ struct BaseDefiningValueResult {
#endif
}
};
}
} // end anonymous namespace
static BaseDefiningValueResult findBaseDefiningValue(Value *I);
@ -431,7 +476,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
if (isa<LoadInst>(I))
// The value loaded is an gc base itself
return BaseDefiningValueResult(I, true);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
// The base of this GEP is the base
@ -444,12 +488,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
break;
case Intrinsic::experimental_gc_statepoint:
llvm_unreachable("statepoints don't produce pointers");
case Intrinsic::experimental_gc_relocate: {
case Intrinsic::experimental_gc_relocate:
// Rerunning safepoint insertion after safepoints are already
// inserted is not supported. It could probably be made to work,
// but why are you doing this? There's no good reason.
llvm_unreachable("repeat safepoint insertion is not supported");
}
case Intrinsic::gcroot:
// Currently, this mechanism hasn't been extended to work with gcroot.
// There's no reason it couldn't be, but I haven't thought about the
@ -553,6 +596,7 @@ static bool isKnownBaseResult(Value *V) {
}
namespace {
/// Models the state of a single base defining value in the findBasePointer
/// algorithm for determining where a new instruction is needed to propagate
/// the base of this BDV.
@ -560,7 +604,7 @@ class BDVState {
public:
enum Status { Unknown, Base, Conflict };
BDVState() : Status(Unknown), BaseValue(nullptr) {}
BDVState() : BaseValue(nullptr) {}
explicit BDVState(Status Status, Value *BaseValue = nullptr)
: Status(Status), BaseValue(BaseValue) {
@ -599,16 +643,17 @@ public:
case Conflict:
OS << "C";
break;
};
}
OS << " (" << getBaseValue() << " - "
<< (getBaseValue() ? getBaseValue()->getName() : "nullptr") << "): ";
}
private:
Status Status;
Status Status = Unknown;
AssertingVH<Value> BaseValue; // Non-null only if Status == Base.
};
}
} // end anonymous namespace
#ifndef NDEBUG
static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
@ -1171,7 +1216,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
return;
auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
auto ValIt = find(LiveVec, Val);
auto ValIt = llvm::find(LiveVec, Val);
assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
size_t Index = std::distance(LiveVec.begin(), ValIt);
assert(Index < LiveVec.size() && "Bug in std::find?");
@ -1231,7 +1276,7 @@ class DeferredReplacement {
AssertingVH<Instruction> New;
bool IsDeoptimize = false;
DeferredReplacement() {}
DeferredReplacement() = default;
public:
static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
@ -1288,7 +1333,8 @@ public:
OldI->eraseFromParent();
}
};
}
} // end anonymous namespace
static StringRef getDeoptLowering(CallSite CS) {
const char *DeoptLowering = "deopt-lowering";
@ -1306,7 +1352,6 @@ static StringRef getDeoptLowering(CallSite CS) {
return "live-through";
}
static void
makeStatepointExplicitImpl(const CallSite CS, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
@ -1530,7 +1575,6 @@ static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseMap<Value *, Value *> &AllocaMap,
DenseSet<Value *> &VisitedLiveValues) {
for (User *U : GCRelocs) {
GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
if (!Relocate)
@ -1566,7 +1610,6 @@ static void insertRematerializationStores(
const RematerializedValueMapTy &RematerializedValues,
DenseMap<Value *, Value *> &AllocaMap,
DenseSet<Value *> &VisitedLiveValues) {
for (auto RematerializedValuePair: RematerializedValues) {
Instruction *RematerializedValue = RematerializedValuePair.first;
Value *OriginalValue = RematerializedValuePair.second;
@ -1832,7 +1875,6 @@ static void findLiveReferences(
static Value* findRematerializableChainToBasePointer(
SmallVectorImpl<Instruction*> &ChainToBase,
Value *CurrentValue) {
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
ChainToBase.push_back(GEP);
return findRematerializableChainToBasePointer(ChainToBase,
@ -1888,7 +1930,6 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
}
static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
OrigRootPhi.getParent() != AlternateRootPhi.getParent())
@ -1912,7 +1953,6 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
return false;
}
return true;
}
// From the statepoint live set pick values that are cheaper to recompute then
@ -2313,7 +2353,6 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
}
void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) {
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
return;
// These are the attributes that are still valid on loads and stores after
@ -2339,7 +2378,6 @@ void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I
// Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
}
void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) {
@ -2349,7 +2387,6 @@ void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Functio
LLVMContext &Ctx = F.getContext();
MDBuilder Builder(Ctx);
for (Instruction &I : instructions(F)) {
if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
@ -2398,7 +2435,7 @@ static bool shouldRewriteStatepointsIn(Function &F) {
void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) {
#ifndef NDEBUG
assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!");
assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!");
#endif
for (Function &F : M)
@ -2666,7 +2703,6 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
StatepointLiveSetTy &Out) {
BasicBlock *BB = Inst->getParent();
// Note: The copy is intentional and required

View File

@ -24,28 +24,53 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantFolder.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@ -55,6 +80,17 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#ifndef NDEBUG
// We only use this for a debug check.
@ -88,10 +124,12 @@ static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
cl::Hidden);
namespace {
/// \brief A custom IRBuilder inserter which prefixes all names, but only in
/// Assert builds.
class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter {
std::string Prefix;
const Twine getNameWithPrefix(const Twine &Name) const {
return Name.isTriviallyEmpty() ? Name : Prefix + Name;
}
@ -107,11 +145,9 @@ protected:
}
};
/// \brief Provide a typedef for IRBuilder that drops names in release builds.
using IRBuilderTy = llvm::IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
}
/// \brief Provide a type for IRBuilder that drops names in release builds.
using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
namespace {
/// \brief A used slice of an alloca.
///
/// This structure represents a slice of an alloca used by some instruction. It
@ -120,17 +156,18 @@ namespace {
/// or not when forming partitions of the alloca.
class Slice {
/// \brief The beginning offset of the range.
uint64_t BeginOffset;
uint64_t BeginOffset = 0;
/// \brief The ending offset, not included in the range.
uint64_t EndOffset;
uint64_t EndOffset = 0;
/// \brief Storage for both the use of this slice and whether it can be
/// split.
PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
public:
Slice() : BeginOffset(), EndOffset() {}
Slice() = default;
Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
: BeginOffset(BeginOffset), EndOffset(EndOffset),
UseAndIsSplittable(U, IsSplittable) {}
@ -180,12 +217,15 @@ public:
}
bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
};
} // end anonymous namespace
namespace llvm {
template <typename T> struct isPodLike;
template <> struct isPodLike<Slice> { static const bool value = true; };
}
} // end namespace llvm
/// \brief Representation of the alloca slices.
///
@ -207,13 +247,15 @@ public:
/// \brief Support for iterating over the slices.
/// @{
typedef SmallVectorImpl<Slice>::iterator iterator;
typedef iterator_range<iterator> range;
using iterator = SmallVectorImpl<Slice>::iterator;
using range = iterator_range<iterator>;
iterator begin() { return Slices.begin(); }
iterator end() { return Slices.end(); }
typedef SmallVectorImpl<Slice>::const_iterator const_iterator;
typedef iterator_range<const_iterator> const_range;
using const_iterator = SmallVectorImpl<Slice>::const_iterator;
using const_range = iterator_range<const_iterator>;
const_iterator begin() const { return Slices.begin(); }
const_iterator end() const { return Slices.end(); }
/// @}
@ -264,6 +306,7 @@ public:
private:
template <typename DerivedT, typename RetT = void> class BuilderBase;
class SliceBuilder;
friend class AllocaSlices::SliceBuilder;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@ -320,7 +363,7 @@ private:
friend class AllocaSlices;
friend class AllocaSlices::partition_iterator;
typedef AllocaSlices::iterator iterator;
using iterator = AllocaSlices::iterator;
/// \brief The beginning and ending offsets of the alloca for this
/// partition.
@ -403,12 +446,12 @@ class AllocaSlices::partition_iterator
/// \brief We also need to keep track of the maximum split end offset seen.
/// FIXME: Do we really?
uint64_t MaxSplitSliceEndOffset;
uint64_t MaxSplitSliceEndOffset = 0;
/// \brief Sets the partition to be empty at given iterator, and sets the
/// end iterator.
partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
: P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
: P(SI), SE(SE) {
// If not already at the end, advance our state to form the initial
// partition.
if (SI != SE)
@ -432,19 +475,21 @@ class AllocaSlices::partition_iterator
// Remove the uses which have ended in the prior partition. This
// cannot change the max split slice end because we just checked that
// the prior partition ended prior to that max.
P.SplitTails.erase(
remove_if(P.SplitTails,
[&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
P.SplitTails.end());
assert(any_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() == MaxSplitSliceEndOffset;
}) &&
P.SplitTails.erase(llvm::remove_if(P.SplitTails,
[&](Slice *S) {
return S->endOffset() <=
P.EndOffset;
}),
P.SplitTails.end());
assert(llvm::any_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() == MaxSplitSliceEndOffset;
}) &&
"Could not find the current max split slice offset!");
assert(all_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() <= MaxSplitSliceEndOffset;
}) &&
assert(llvm::all_of(P.SplitTails,
[&](Slice *S) {
return S->endOffset() <= MaxSplitSliceEndOffset;
}) &&
"Max split slice end offset is not actually the max!");
}
}
@ -608,7 +653,8 @@ static Value *foldPHINodeOrSelectInst(Instruction &I) {
class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
friend class PtrUseVisitor<SliceBuilder>;
friend class InstVisitor<SliceBuilder>;
typedef PtrUseVisitor<SliceBuilder> Base;
using Base = PtrUseVisitor<SliceBuilder>;
const uint64_t AllocSize;
AllocaSlices &AS;
@ -996,8 +1042,9 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
return;
}
Slices.erase(remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
Slices.end());
Slices.erase(
llvm::remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
Slices.end());
#ifndef NDEBUG
if (SROARandomShuffleSlices) {
@ -1820,11 +1867,12 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// do that until all the backends are known to produce good code for all
// integer vector types.
if (!HaveCommonEltTy) {
CandidateTys.erase(remove_if(CandidateTys,
[](VectorType *VTy) {
return !VTy->getElementType()->isIntegerTy();
}),
CandidateTys.end());
CandidateTys.erase(
llvm::remove_if(CandidateTys,
[](VectorType *VTy) {
return !VTy->getElementType()->isIntegerTy();
}),
CandidateTys.end());
// If there were no integer vector types, give up.
if (CandidateTys.empty())
@ -2151,8 +2199,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
class llvm::sroa::AllocaSliceRewriter
: public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
friend class InstVisitor<AllocaSliceRewriter, bool>;
using Base = InstVisitor<AllocaSliceRewriter, bool>;
const DataLayout &DL;
AllocaSlices &AS;
@ -2182,16 +2231,18 @@ class llvm::sroa::AllocaSliceRewriter
// The original offset of the slice currently being rewritten relative to
// the original alloca.
uint64_t BeginOffset, EndOffset;
uint64_t BeginOffset = 0;
uint64_t EndOffset = 0;
// The new offsets of the slice currently being rewritten relative to the
// original alloca.
uint64_t NewBeginOffset, NewEndOffset;
uint64_t SliceSize;
bool IsSplittable;
bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
bool IsSplittable = false;
bool IsSplit = false;
Use *OldUse = nullptr;
Instruction *OldPtr = nullptr;
// Track post-rewrite users which are PHI nodes and Selects.
SmallSetVector<PHINode *, 8> &PHIUsers;
@ -2221,8 +2272,7 @@ public:
VecTy(PromotableVecTy),
ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
if (VecTy) {
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
@ -2987,6 +3037,7 @@ private:
};
namespace {
/// \brief Visitor to rewrite aggregate loads and stores as scalar.
///
/// This pass aggressively rewrites all aggregate loads and stores on
@ -2994,7 +3045,7 @@ namespace {
/// with scalar loads and stores.
class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
friend class InstVisitor<AggLoadStoreRewriter, bool>;
/// Queue of pointer uses to analyze and potentially rewrite.
SmallVector<Use *, 8> Queue;
@ -3037,12 +3088,15 @@ private:
protected:
/// The builder used to form new instructions.
IRBuilderTy IRB;
/// The indices which to be used with insert- or extractvalue to select the
/// appropriate value within the aggregate.
SmallVector<unsigned, 4> Indices;
/// The indices to a GEP instruction which will move Ptr to the correct slot
/// within the aggregate.
SmallVector<Value *, 4> GEPIndices;
/// The base pointer of the original op, used as a base for GEPing the
/// split operations.
Value *Ptr;
@ -3193,7 +3247,8 @@ private:
return false;
}
};
}
} // end anonymous namespace
/// \brief Strip aggregate type wrapping.
///
@ -3485,58 +3540,60 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// match relative to their starting offset. We have to verify this prior to
// any rewriting.
Stores.erase(
remove_if(Stores,
[&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
// Lookup the load we are storing in our map of split
// offsets.
auto *LI = cast<LoadInst>(SI->getValueOperand());
// If it was completely unsplittable, then we're done,
// and this store can't be pre-split.
if (UnsplittableLoads.count(LI))
return true;
llvm::remove_if(Stores,
[&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
// Lookup the load we are storing in our map of split
// offsets.
auto *LI = cast<LoadInst>(SI->getValueOperand());
// If it was completely unsplittable, then we're done,
// and this store can't be pre-split.
if (UnsplittableLoads.count(LI))
return true;
auto LoadOffsetsI = SplitOffsetsMap.find(LI);
if (LoadOffsetsI == SplitOffsetsMap.end())
return false; // Unrelated loads are definitely safe.
auto &LoadOffsets = LoadOffsetsI->second;
auto LoadOffsetsI = SplitOffsetsMap.find(LI);
if (LoadOffsetsI == SplitOffsetsMap.end())
return false; // Unrelated loads are definitely safe.
auto &LoadOffsets = LoadOffsetsI->second;
// Now lookup the store's offsets.
auto &StoreOffsets = SplitOffsetsMap[SI];
// Now lookup the store's offsets.
auto &StoreOffsets = SplitOffsetsMap[SI];
// If the relative offsets of each split in the load and
// store match exactly, then we can split them and we
// don't need to remove them here.
if (LoadOffsets.Splits == StoreOffsets.Splits)
return false;
// If the relative offsets of each split in the load and
// store match exactly, then we can split them and we
// don't need to remove them here.
if (LoadOffsets.Splits == StoreOffsets.Splits)
return false;
DEBUG(dbgs() << " Mismatched splits for load and store:\n"
<< " " << *LI << "\n"
<< " " << *SI << "\n");
DEBUG(dbgs()
<< " Mismatched splits for load and store:\n"
<< " " << *LI << "\n"
<< " " << *SI << "\n");
// We've found a store and load that we need to split
// with mismatched relative splits. Just give up on them
// and remove both instructions from our list of
// candidates.
UnsplittableLoads.insert(LI);
return true;
}),
// We've found a store and load that we need to split
// with mismatched relative splits. Just give up on them
// and remove both instructions from our list of
// candidates.
UnsplittableLoads.insert(LI);
return true;
}),
Stores.end());
// Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
Stores.erase(remove_if(Stores,
[&UnsplittableLoads](StoreInst *SI) {
auto *LI = cast<LoadInst>(SI->getValueOperand());
return UnsplittableLoads.count(LI);
}),
Stores.erase(llvm::remove_if(Stores,
[&UnsplittableLoads](StoreInst *SI) {
auto *LI =
cast<LoadInst>(SI->getValueOperand());
return UnsplittableLoads.count(LI);
}),
Stores.end());
// Once we've established all the loads that can't be split for some reason,
// filter any that made it into our list out.
Loads.erase(remove_if(Loads,
[&UnsplittableLoads](LoadInst *LI) {
return UnsplittableLoads.count(LI);
}),
Loads.erase(llvm::remove_if(Loads,
[&UnsplittableLoads](LoadInst *LI) {
return UnsplittableLoads.count(LI);
}),
Loads.end());
// If no loads or stores are left, there is no pre-splitting to be done for
@ -3804,7 +3861,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
// Remove the killed slices that have ben pre-split.
AS.erase(remove_if(AS, [](const Slice &S) { return S.isDead(); }), AS.end());
AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }),
AS.end());
// Insert our new slices. This will sort and merge them into the sorted
// sequence.
@ -3819,7 +3877,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Finally, don't try to promote any allocas that new require re-splitting.
// They have already been added to the worklist above.
PromotableAllocas.erase(
remove_if(
llvm::remove_if(
PromotableAllocas,
[&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
PromotableAllocas.end());
@ -4256,7 +4314,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
Worklist.remove_if(IsInSet);
PostPromotionWorklist.remove_if(IsInSet);
PromotableAllocas.erase(remove_if(PromotableAllocas, IsInSet),
PromotableAllocas.erase(llvm::remove_if(PromotableAllocas, IsInSet),
PromotableAllocas.end());
DeletedAllocas.clear();
}
@ -4291,9 +4349,12 @@ class llvm::sroa::SROALegacyPass : public FunctionPass {
SROA Impl;
public:
static char ID;
SROALegacyPass() : FunctionPass(ID) {
initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
@ -4303,6 +4364,7 @@ public:
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
return !PA.areAllPreserved();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
@ -4311,7 +4373,6 @@ public:
}
StringRef getPassName() const override { return "SROA"; }
static char ID;
};
char SROALegacyPass::ID = 0;

View File

@ -15,38 +15,86 @@
// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
@ -382,7 +430,6 @@ static bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) {
/// possible scalar operand in vectorized instruction.
static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
TargetLibraryInfo *TLI) {
unsigned Opcode = UserInst->getOpcode();
switch (Opcode) {
case Instruction::Load: {
@ -427,24 +474,25 @@ static bool isSimple(Instruction *I) {
}
namespace llvm {
namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
public:
typedef SmallVector<Value *, 8> ValueList;
typedef SmallVector<Instruction *, 16> InstrList;
typedef SmallPtrSet<Value *, 16> ValueSet;
typedef SmallVector<StoreInst *, 8> StoreList;
typedef MapVector<Value *, SmallVector<Instruction *, 2>>
ExtraValueToDebugLocsMap;
using ValueList = SmallVector<Value *, 8>;
using InstrList = SmallVector<Instruction *, 16>;
using ValueSet = SmallPtrSet<Value *, 16>;
using StoreList = SmallVector<StoreInst *, 8>;
using ExtraValueToDebugLocsMap =
MapVector<Value *, SmallVector<Instruction *, 2>>;
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
const DataLayout *DL, OptimizationRemarkEmitter *ORE)
: NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB),
DL(DL), ORE(ORE), Builder(Se->getContext()) {
: F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC),
DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
// Use the vector register size specified by the target unless overridden
// by a command-line option.
@ -466,6 +514,7 @@ public:
/// \brief Vectorize the tree that starts with the elements in \p VL.
/// Returns the vectorized root.
Value *vectorizeTree();
/// Vectorize the tree but with the list of externally used values \p
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
/// generated extractvalue instructions.
@ -483,6 +532,7 @@ public:
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
void buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst = None);
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
/// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
/// into account (anf updating it, if required) list of externally used
@ -599,15 +649,14 @@ private:
void reorderAltShuffleOperands(unsigned Opcode, ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right);
/// \reorder commutative operands to get better probability of
/// generating vectorized code.
void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right);
struct TreeEntry {
TreeEntry(std::vector<TreeEntry> &Container)
: Scalars(), VectorizedValue(nullptr), NeedToGather(0),
Container(Container) {}
TreeEntry(std::vector<TreeEntry> &Container) : Container(Container) {}
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
@ -619,10 +668,10 @@ private:
ValueList Scalars;
/// The Scalars are vectorized into this value. It is initialized to Null.
Value *VectorizedValue;
Value *VectorizedValue = nullptr;
/// Do we need to gather this sequence ?
bool NeedToGather;
bool NeedToGather = false;
/// Points back to the VectorizableTree.
///
@ -686,16 +735,19 @@ private:
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
ExternalUser (Value *S, llvm::User *U, int L) :
Scalar(S), User(U), Lane(L){}
ExternalUser(Value *S, llvm::User *U, int L)
: Scalar(S), User(U), Lane(L) {}
// Which scalar in our function.
Value *Scalar;
// Which user that uses the scalar.
llvm::User *User;
// Which lane does the scalar belong to.
int Lane;
};
typedef SmallVector<ExternalUser, 16> UserList;
using UserList = SmallVector<ExternalUser, 16>;
/// Checks if two instructions may access the same memory.
///
@ -703,7 +755,6 @@ private:
/// is invariant in the calling loop.
bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
Instruction *Inst2) {
// First check if the result is already in the cache.
AliasCacheKey key = std::make_pair(Inst1, Inst2);
Optional<bool> &result = AliasCache[key];
@ -721,7 +772,7 @@ private:
return aliased;
}
typedef std::pair<Instruction *, Instruction *> AliasCacheKey;
using AliasCacheKey = std::pair<Instruction *, Instruction *>;
/// Cache for alias results.
/// TODO: consider moving this to the AliasAnalysis itself.
@ -754,6 +805,7 @@ private:
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherSeq;
/// A list of blocks that we are going to CSE.
SetVector<BasicBlock *> CSEBlocks;
@ -762,17 +814,11 @@ private:
/// instruction bundle (= a group of instructions which is combined into a
/// vector instruction).
struct ScheduleData {
// The initial value for the dependency counters. It means that the
// dependencies are not calculated yet.
enum { InvalidDeps = -1 };
ScheduleData()
: Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr),
NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0),
Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps),
UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false),
OpValue(nullptr) {}
ScheduleData() = default;
void init(int BlockSchedulingRegionID, Value *OpVal) {
FirstInBundle = this;
@ -842,19 +888,19 @@ private:
}
}
Instruction *Inst;
Instruction *Inst = nullptr;
/// Points to the head in an instruction bundle (and always to this for
/// single instructions).
ScheduleData *FirstInBundle;
ScheduleData *FirstInBundle = nullptr;
/// Single linked list of all instructions in a bundle. Null if it is a
/// single instruction.
ScheduleData *NextInBundle;
ScheduleData *NextInBundle = nullptr;
/// Single linked list of all memory instructions (e.g. load, store, call)
/// in the block - until the end of the scheduling region.
ScheduleData *NextLoadStore;
ScheduleData *NextLoadStore = nullptr;
/// The dependent memory instructions.
/// This list is derived on demand in calculateDependencies().
@ -862,34 +908,33 @@ private:
/// This ScheduleData is in the current scheduling region if this matches
/// the current SchedulingRegionID of BlockScheduling.
int SchedulingRegionID;
int SchedulingRegionID = 0;
/// Used for getting a "good" final ordering of instructions.
int SchedulingPriority;
int SchedulingPriority = 0;
/// The number of dependencies. Constitutes of the number of users of the
/// instruction plus the number of dependent memory instructions (if any).
/// This value is calculated on demand.
/// If InvalidDeps, the number of dependencies is not calculated yet.
///
int Dependencies;
int Dependencies = InvalidDeps;
/// The number of dependencies minus the number of dependencies of scheduled
/// instructions. As soon as this is zero, the instruction/bundle gets ready
/// for scheduling.
/// Note that this is negative as long as Dependencies is not calculated.
int UnscheduledDeps;
int UnscheduledDeps = InvalidDeps;
/// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
/// single instructions.
int UnscheduledDepsInBundle;
int UnscheduledDepsInBundle = InvalidDeps;
/// True if this instruction is scheduled (or considered as scheduled in the
/// dry-run).
bool IsScheduled;
bool IsScheduled = false;
/// Opcode of the current instruction in the schedule data.
Value *OpValue;
Value *OpValue = nullptr;
};
#ifndef NDEBUG
@ -903,18 +948,9 @@ private:
friend struct DOTGraphTraits<BoUpSLP *>;
/// Contains all scheduling data for a basic block.
///
struct BlockScheduling {
BlockScheduling(BasicBlock *BB)
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
ScheduleStart(nullptr), ScheduleEnd(nullptr),
FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
ScheduleRegionSize(0),
ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
// Make sure that the initial SchedulingRegionID is greater than the
// initial SchedulingRegionID in ScheduleData (which is 0).
SchedulingRegionID(1) {}
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}
void clear() {
ReadyInsts.clear();
@ -1090,28 +1126,30 @@ private:
ReadyList ReadyInsts;
/// The first instruction of the scheduling region.
Instruction *ScheduleStart;
Instruction *ScheduleStart = nullptr;
/// The first instruction _after_ the scheduling region.
Instruction *ScheduleEnd;
Instruction *ScheduleEnd = nullptr;
/// The first memory accessing instruction in the scheduling region
/// (can be null).
ScheduleData *FirstLoadStoreInRegion;
ScheduleData *FirstLoadStoreInRegion = nullptr;
/// The last memory accessing instruction in the scheduling region
/// (can be null).
ScheduleData *LastLoadStoreInRegion;
ScheduleData *LastLoadStoreInRegion = nullptr;
/// The current size of the scheduling region.
int ScheduleRegionSize;
int ScheduleRegionSize = 0;
/// The maximum size allowed for the scheduling region.
int ScheduleRegionSizeLimit;
int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;
/// The ID of the scheduling region. For a new vectorization iteration this
/// is incremented which "removes" all ScheduleData from the region.
int SchedulingRegionID;
int SchedulingRegionID = 1;
// Make sure that the initial SchedulingRegionID is greater than the
// initial SchedulingRegionID in ScheduleData (which is 0).
};
/// Attaches the BlockScheduling structures to basic blocks.
@ -1125,10 +1163,10 @@ private:
ArrayRef<Value *> UserIgnoreList;
// Number of load bundles that contain consecutive loads.
int NumLoadsWantToKeepOrder;
int NumLoadsWantToKeepOrder = 0;
// Number of load bundles that contain consecutive loads in reversed order.
int NumLoadsWantToChangeOrder;
int NumLoadsWantToChangeOrder = 0;
// Analysis and block reference.
Function *F;
@ -1155,20 +1193,20 @@ private:
/// original width.
MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
};
} // end namespace slpvectorizer
template <> struct GraphTraits<BoUpSLP *> {
typedef BoUpSLP::TreeEntry TreeEntry;
using TreeEntry = BoUpSLP::TreeEntry;
/// NodeRef has to be a pointer per the GraphWriter.
typedef TreeEntry *NodeRef;
using NodeRef = TreeEntry *;
/// \brief Add the VectorizableTree to the index iterator to be able to return
/// TreeEntry pointers.
struct ChildIteratorType
: public iterator_adaptor_base<ChildIteratorType,
SmallVector<int, 1>::iterator> {
std::vector<TreeEntry> &VectorizableTree;
ChildIteratorType(SmallVector<int, 1>::iterator W,
@ -1183,17 +1221,19 @@ template <> struct GraphTraits<BoUpSLP *> {
static ChildIteratorType child_begin(NodeRef N) {
return {N->UserTreeIndices.begin(), N->Container};
}
static ChildIteratorType child_end(NodeRef N) {
return {N->UserTreeIndices.end(), N->Container};
}
/// For the node iterator we just need to turn the TreeEntry iterator into a
/// TreeEntry* iterator so that it dereferences to NodeRef.
typedef pointer_iterator<std::vector<TreeEntry>::iterator> nodes_iterator;
using nodes_iterator = pointer_iterator<std::vector<TreeEntry>::iterator>;
static nodes_iterator nodes_begin(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.begin());
}
static nodes_iterator nodes_end(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.end());
}
@ -1202,7 +1242,7 @@ template <> struct GraphTraits<BoUpSLP *> {
};
template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
typedef BoUpSLP::TreeEntry TreeEntry;
using TreeEntry = BoUpSLP::TreeEntry;
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
@ -1239,6 +1279,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ExtraValueToDebugLocsMap ExternallyUsedValues;
buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
}
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ExtraValueToDebugLocsMap &ExternallyUsedValues,
ArrayRef<Value *> UserIgnoreLst) {
@ -1627,7 +1668,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
case Instruction::Xor:
newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
@ -1650,7 +1691,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (unsigned j = 0; j < VL.size(); ++j) {
@ -1784,7 +1825,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
return;
}
case Instruction::ShuffleVector: {
case Instruction::ShuffleVector:
// If this is not an alternate sequence of opcode like add-sub
// then do not vectorize this instruction.
if (!isAltShuffle) {
@ -1814,7 +1855,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
default:
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx);
@ -1942,11 +1983,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
Instruction *VL0 = cast<Instruction>(VL[0]);
switch (Opcode) {
case Instruction::PHI: {
case Instruction::PHI:
return 0;
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
case Instruction::ExtractElement:
if (canReuseExtract(VL, VL0)) {
int DeadCost = 0;
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
@ -1962,7 +2003,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
return -DeadCost;
}
return getGatherCost(VecTy);
}
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@ -2173,7 +2214,6 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
}
bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
// We can vectorize the tree if its size is greater than or equal to the
// minimum size specified by the MinTreeSize command line option.
if (VectorizableTree.size() >= MinTreeSize)
@ -2465,8 +2505,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right) {
if (VL.size()) {
if (!VL.empty()) {
// Peel the first iteration out of the loop since there's nothing
// interesting to do anyway and it simplifies the checks in the loop.
auto *I = cast<Instruction>(VL[0]);
@ -2556,14 +2595,13 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
}
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue) {
// Get the basic block this bundle is in. All instructions in the bundle
// should be in this block.
auto *Front = cast<Instruction>(OpValue);
auto *BB = Front->getParent();
const unsigned Opcode = cast<Instruction>(OpValue)->getOpcode();
const unsigned AltOpcode = getAltOpcode(Opcode);
assert(all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
return !sameOpcodeOrAlt(Opcode, AltOpcode,
cast<Instruction>(V)->getOpcode()) ||
cast<Instruction>(V)->getParent() == BB;
@ -3082,7 +3120,6 @@ Value *BoUpSLP::vectorizeTree() {
Value *
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
@ -3482,7 +3519,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
BasicBlock::reverse_iterator UpperEnd = BB->rend();
BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
BasicBlock::iterator LowerEnd = BB->end();
for (;;) {
while (true) {
if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
return false;
@ -3696,7 +3733,6 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
}
void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
if (!BS->ScheduleStart)
return;
@ -3828,7 +3864,6 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
SmallVectorImpl<Value *> &ToDemote,
SmallVectorImpl<Value *> &Roots) {
// We can always demote constants.
if (isa<Constant>(V)) {
ToDemote.push_back(V);
@ -3971,7 +4006,7 @@ void BoUpSLP::computeMinimumValueSizes() {
// Determine if the sign bit of all the roots is known to be zero. If not,
// IsKnownPositive is set to False.
IsKnownPositive = all_of(TreeRoot, [&](Value *R) {
IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) {
KnownBits Known = computeKnownBits(R, *DL);
return Known.isNonNegative();
});
@ -3979,7 +4014,7 @@ void BoUpSLP::computeMinimumValueSizes() {
// Determine the maximum number of bits required to store the scalar
// values.
for (auto *Scalar : ToDemote) {
auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT);
auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT);
auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
}
@ -4024,6 +4059,7 @@ void BoUpSLP::computeMinimumValueSizes() {
}
namespace {
/// The SLPVectorizer Pass.
struct SLPVectorizer : public FunctionPass {
SLPVectorizerPass Impl;
@ -4035,7 +4071,6 @@ struct SLPVectorizer : public FunctionPass {
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M) override {
return false;
}
@ -4075,6 +4110,7 @@ struct SLPVectorizer : public FunctionPass {
AU.setPreservesCFG();
}
};
} // end anonymous namespace
PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
@ -4221,7 +4257,9 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
if (Cost < -SLPCostThreshold) {
DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
using namespace ore;
R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
cast<StoreInst>(Chain[i]))
<< "Stores SLP vectorized with cost " << NV("Cost", Cost)
@ -4310,7 +4348,6 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
}
void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
// Initialize the collections. We will make a single pass over the block.
Stores.clear();
GEPs.clear();
@ -4319,7 +4356,6 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
// Stores and GEPs according to the underlying objects of their pointer
// operands.
for (Instruction &I : *BB) {
// Ignore store instructions that are volatile or have a pointer operand
// that doesn't point to a scalar type.
if (auto *SI = dyn_cast<StoreInst>(&I)) {
@ -4557,6 +4593,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
}
namespace {
/// Model horizontal reductions.
///
/// A horizontal reduction is a tree of reduction operations (currently add and
@ -4594,10 +4631,13 @@ class HorizontalReduction {
struct OperationData {
/// true if the operation is a reduced value, false if reduction operation.
bool IsReducedValue = false;
/// Opcode of the instruction.
unsigned Opcode = 0;
/// Left operand of the reduction operation.
Value *LHS = nullptr;
/// Right operand of the reduction operation.
Value *RHS = nullptr;
@ -4610,40 +4650,48 @@ class HorizontalReduction {
public:
explicit OperationData() = default;
/// Construction for reduced values. They are identified by opcode only and
/// don't have associated LHS/RHS values.
explicit OperationData(Value *V) : IsReducedValue(true) {
if (auto *I = dyn_cast<Instruction>(V))
Opcode = I->getOpcode();
}
/// Constructor for binary reduction operations with opcode and its left and
/// right operands.
OperationData(unsigned Opcode, Value *LHS, Value *RHS)
: IsReducedValue(false), Opcode(Opcode), LHS(LHS), RHS(RHS) {}
: Opcode(Opcode), LHS(LHS), RHS(RHS) {}
explicit operator bool() const { return Opcode; }
/// Get the index of the first operand.
unsigned getFirstOperandIndex() const {
assert(!!*this && "The opcode is not set.");
return 0;
}
/// Total number of operands in the reduction operation.
unsigned getNumberOfOperands() const {
assert(!IsReducedValue && !!*this && LHS && RHS &&
"Expected reduction operation.");
return 2;
}
/// Expected number of uses for reduction operations/reduced values.
unsigned getRequiredNumberOfUses() const {
assert(!IsReducedValue && !!*this && LHS && RHS &&
"Expected reduction operation.");
return 1;
}
/// Checks if instruction is associative and can be vectorized.
bool isAssociative(Instruction *I) const {
assert(!IsReducedValue && *this && LHS && RHS &&
"Expected reduction operation.");
return I->isAssociative();
}
/// Checks if the reduction operation can be vectorized.
bool isVectorizable(Instruction *I) const {
return isVectorizable() && isAssociative(I);
@ -4665,13 +4713,16 @@ class HorizontalReduction {
LHS = nullptr;
RHS = nullptr;
}
/// Get the opcode of the reduction operation.
unsigned getOpcode() const {
assert(isVectorizable() && "Expected vectorizable operation.");
return Opcode;
}
Value *getLHS() const { return LHS; }
Value *getRHS() const { return RHS; }
/// Creates reduction operation with the current opcode.
Value *createOp(IRBuilder<> &Builder, const Twine &Name = "") const {
assert(!IsReducedValue &&
@ -4686,8 +4737,10 @@ class HorizontalReduction {
/// The operation data of the reduction operation.
OperationData ReductionData;
/// The operation data of the values we perform a reduction on.
OperationData ReducedValueData;
/// Should we model this reduction as a pairwise reduction tree or a tree that
/// splits the vector in halves and adds those halves.
bool IsPairwiseReduction = false;
@ -5018,6 +5071,7 @@ private:
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
};
} // end anonymous namespace
/// \brief Recognize construction of vectors like
@ -5425,7 +5479,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
auto Changed = false;
for (auto &Entry : GEPs) {
// If the getelementptr list has fewer than two elements, there's nothing
// to do.
if (Entry.second.size() < 2)
@ -5530,7 +5583,9 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
}
char SLPVectorizer::ID = 0;
static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
@ -5541,6 +5596,4 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
namespace llvm {
Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }
}
Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); }