mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[Analysis, Transforms] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC).
llvm-svn: 312383
This commit is contained in:
parent
fe8f0ed2ee
commit
cbd8f32d28
@ -1,4 +1,4 @@
|
||||
//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume ---*- C++ -*-===//
|
||||
//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume -----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -18,9 +18,8 @@
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/ADT/DenseMapInfo.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Pass.h"
|
||||
@ -28,6 +27,11 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class CallInst;
|
||||
class Function;
|
||||
class raw_ostream;
|
||||
class Value;
|
||||
|
||||
/// \brief A cache of @llvm.assume calls within a function.
|
||||
///
|
||||
/// This cache provides fast lookup of assumptions within a function by caching
|
||||
@ -47,6 +51,7 @@ class AssumptionCache {
|
||||
|
||||
class AffectedValueCallbackVH final : public CallbackVH {
|
||||
AssumptionCache *AC;
|
||||
|
||||
void deleted() override;
|
||||
void allUsesReplacedWith(Value *) override;
|
||||
|
||||
@ -76,7 +81,7 @@ class AssumptionCache {
|
||||
///
|
||||
/// We want to be as lazy about this as possible, and so we scan the function
|
||||
/// at the last moment.
|
||||
bool Scanned;
|
||||
bool Scanned = false;
|
||||
|
||||
/// \brief Scan the function for assumptions and add them to the cache.
|
||||
void scanFunction();
|
||||
@ -84,7 +89,7 @@ class AssumptionCache {
|
||||
public:
|
||||
/// \brief Construct an AssumptionCache from a function by scanning all of
|
||||
/// its instructions.
|
||||
AssumptionCache(Function &F) : F(F), Scanned(false) {}
|
||||
AssumptionCache(Function &F) : F(F) {}
|
||||
|
||||
/// This cache is designed to be self-updating and so it should never be
|
||||
/// invalidated.
|
||||
@ -145,10 +150,11 @@ public:
|
||||
/// assumption caches for a given function.
|
||||
class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> {
|
||||
friend AnalysisInfoMixin<AssumptionAnalysis>;
|
||||
|
||||
static AnalysisKey Key;
|
||||
|
||||
public:
|
||||
typedef AssumptionCache Result;
|
||||
using Result = AssumptionCache;
|
||||
|
||||
AssumptionCache run(Function &F, FunctionAnalysisManager &) {
|
||||
return AssumptionCache(F);
|
||||
@ -161,6 +167,7 @@ class AssumptionPrinterPass : public PassInfoMixin<AssumptionPrinterPass> {
|
||||
|
||||
public:
|
||||
explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {}
|
||||
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
};
|
||||
|
||||
@ -177,10 +184,11 @@ class AssumptionCacheTracker : public ImmutablePass {
|
||||
/// delete our cache of intrinsics for a function when it is deleted.
|
||||
class FunctionCallbackVH final : public CallbackVH {
|
||||
AssumptionCacheTracker *ACT;
|
||||
|
||||
void deleted() override;
|
||||
|
||||
public:
|
||||
typedef DenseMapInfo<Value *> DMI;
|
||||
using DMI = DenseMapInfo<Value *>;
|
||||
|
||||
FunctionCallbackVH(Value *V, AssumptionCacheTracker *ACT = nullptr)
|
||||
: CallbackVH(V), ACT(ACT) {}
|
||||
@ -188,8 +196,10 @@ class AssumptionCacheTracker : public ImmutablePass {
|
||||
|
||||
friend FunctionCallbackVH;
|
||||
|
||||
typedef DenseMap<FunctionCallbackVH, std::unique_ptr<AssumptionCache>,
|
||||
FunctionCallbackVH::DMI> FunctionCallsMap;
|
||||
using FunctionCallsMap =
|
||||
DenseMap<FunctionCallbackVH, std::unique_ptr<AssumptionCache>,
|
||||
FunctionCallbackVH::DMI>;
|
||||
|
||||
FunctionCallsMap AssumptionCaches;
|
||||
|
||||
public:
|
||||
@ -208,6 +218,7 @@ public:
|
||||
}
|
||||
|
||||
void verifyAnalysis() const override;
|
||||
|
||||
bool doFinalization(Module &) override {
|
||||
verifyAnalysis();
|
||||
return false;
|
||||
@ -218,4 +229,4 @@ public:
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
#endif // LLVM_ANALYSIS_ASSUMPTIONCACHE_H
|
||||
|
@ -15,32 +15,32 @@
|
||||
#ifndef LLVM_ANALYSIS_VALUETRACKING_H
|
||||
#define LLVM_ANALYSIS_VALUETRACKING_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/Support/DataTypes.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
namespace llvm {
|
||||
template <typename T> class ArrayRef;
|
||||
class APInt;
|
||||
class AddOperator;
|
||||
class AssumptionCache;
|
||||
class DataLayout;
|
||||
class DominatorTree;
|
||||
class GEPOperator;
|
||||
class Instruction;
|
||||
struct KnownBits;
|
||||
class Loop;
|
||||
class LoopInfo;
|
||||
class OptimizationRemarkEmitter;
|
||||
class MDNode;
|
||||
class StringRef;
|
||||
class TargetLibraryInfo;
|
||||
class Value;
|
||||
|
||||
namespace Intrinsic {
|
||||
enum ID : unsigned;
|
||||
}
|
||||
class AddOperator;
|
||||
class APInt;
|
||||
class AssumptionCache;
|
||||
class DataLayout;
|
||||
class DominatorTree;
|
||||
class GEPOperator;
|
||||
class IntrinsicInst;
|
||||
struct KnownBits;
|
||||
class Loop;
|
||||
class LoopInfo;
|
||||
class MDNode;
|
||||
class OptimizationRemarkEmitter;
|
||||
class StringRef;
|
||||
class TargetLibraryInfo;
|
||||
class Value;
|
||||
|
||||
/// Determine which bits of V are known to be either zero or one and return
|
||||
/// them in the KnownZero/KnownOne bit sets.
|
||||
@ -56,17 +56,20 @@ template <typename T> class ArrayRef;
|
||||
const Instruction *CxtI = nullptr,
|
||||
const DominatorTree *DT = nullptr,
|
||||
OptimizationRemarkEmitter *ORE = nullptr);
|
||||
|
||||
/// Returns the known bits rather than passing by reference.
|
||||
KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
|
||||
unsigned Depth = 0, AssumptionCache *AC = nullptr,
|
||||
const Instruction *CxtI = nullptr,
|
||||
const DominatorTree *DT = nullptr,
|
||||
OptimizationRemarkEmitter *ORE = nullptr);
|
||||
|
||||
/// Compute known bits from the range metadata.
|
||||
/// \p KnownZero the set of bits that are known to be zero
|
||||
/// \p KnownOne the set of bits that are known to be one
|
||||
void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
|
||||
KnownBits &Known);
|
||||
|
||||
/// Return true if LHS and RHS have no common bits set.
|
||||
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
|
||||
const DataLayout &DL,
|
||||
@ -180,7 +183,6 @@ template <typename T> class ArrayRef;
|
||||
/// -0 --> true
|
||||
/// x > +0 --> true
|
||||
/// x < -0 --> false
|
||||
///
|
||||
bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
|
||||
|
||||
/// Return true if we can prove that the specified FP value's sign bit is 0.
|
||||
@ -190,7 +192,6 @@ template <typename T> class ArrayRef;
|
||||
/// -0 --> false
|
||||
/// x > +0 --> true
|
||||
/// x < -0 --> false
|
||||
///
|
||||
bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
|
||||
|
||||
/// If the specified value can be set by repeating the same byte in memory,
|
||||
@ -231,8 +232,10 @@ template <typename T> class ArrayRef;
|
||||
/// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid
|
||||
/// initializer, it just doesn't fit the ConstantDataArray interface).
|
||||
const ConstantDataArray *Array;
|
||||
|
||||
/// Slice starts at this Offset.
|
||||
uint64_t Offset;
|
||||
|
||||
/// Length of the slice.
|
||||
uint64_t Length;
|
||||
|
||||
@ -242,6 +245,7 @@ template <typename T> class ArrayRef;
|
||||
Offset += Delta;
|
||||
Length -= Delta;
|
||||
}
|
||||
|
||||
/// Convenience accessor for elements in the slice.
|
||||
uint64_t operator[](unsigned I) const {
|
||||
return Array==nullptr ? 0 : Array->getElementAsInteger(I + Offset);
|
||||
@ -378,6 +382,7 @@ template <typename T> class ArrayRef;
|
||||
const DominatorTree *DT = nullptr);
|
||||
|
||||
enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
|
||||
|
||||
OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
|
||||
const Value *RHS,
|
||||
const DataLayout &DL,
|
||||
@ -466,6 +471,7 @@ template <typename T> class ArrayRef;
|
||||
SPF_ABS, /// Absolute value
|
||||
SPF_NABS /// Negated absolute value
|
||||
};
|
||||
|
||||
/// \brief Behavior when a floating point min/max is given one NaN and one
|
||||
/// non-NaN as input.
|
||||
enum SelectPatternNaNBehavior {
|
||||
@ -476,6 +482,7 @@ template <typename T> class ArrayRef;
|
||||
/// it has been determined that no operands can
|
||||
/// be NaN).
|
||||
};
|
||||
|
||||
struct SelectPatternResult {
|
||||
SelectPatternFlavor Flavor;
|
||||
SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is
|
||||
@ -489,6 +496,7 @@ template <typename T> class ArrayRef;
|
||||
return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS);
|
||||
}
|
||||
};
|
||||
|
||||
/// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind
|
||||
/// and providing the out parameter results if we successfully match.
|
||||
///
|
||||
@ -532,4 +540,4 @@ template <typename T> class ArrayRef;
|
||||
unsigned Depth = 0);
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
#endif // LLVM_ANALYSIS_VALUETRACKING_H
|
||||
|
@ -17,15 +17,23 @@
|
||||
#define LLVM_TRANSFORMS_SCALAR_SROA_H
|
||||
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AllocaInst;
|
||||
class AssumptionCache;
|
||||
class DominatorTree;
|
||||
class Function;
|
||||
class Instruction;
|
||||
class LLVMContext;
|
||||
class PHINode;
|
||||
class SelectInst;
|
||||
class Use;
|
||||
|
||||
/// A private "module" namespace for types and utilities used by SROA. These
|
||||
/// are implementation details and should not be used by clients.
|
||||
namespace sroa LLVM_LIBRARY_VISIBILITY {
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===---- SLPVectorizer.h ---------------------------------------*- C++ -*-===//
|
||||
//===- SLPVectorizer.h ------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -19,30 +19,48 @@
|
||||
#ifndef LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H
|
||||
#define LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/DemandedBits.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AssumptionCache;
|
||||
class BasicBlock;
|
||||
class CmpInst;
|
||||
class DataLayout;
|
||||
class DemandedBits;
|
||||
class DominatorTree;
|
||||
class Function;
|
||||
class InsertElementInst;
|
||||
class InsertValueInst;
|
||||
class Instruction;
|
||||
class LoopInfo;
|
||||
class OptimizationRemarkEmitter;
|
||||
class PHINode;
|
||||
class ScalarEvolution;
|
||||
class StoreInst;
|
||||
class TargetLibraryInfo;
|
||||
class TargetTransformInfo;
|
||||
class Value;
|
||||
|
||||
/// A private "module" namespace for types and utilities used by this pass.
|
||||
/// These are implementation details and should not be used by clients.
|
||||
namespace slpvectorizer {
|
||||
|
||||
class BoUpSLP;
|
||||
}
|
||||
|
||||
} // end namespace slpvectorizer
|
||||
|
||||
struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
|
||||
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||
typedef MapVector<Value *, StoreList> StoreListMap;
|
||||
typedef SmallVector<WeakTrackingVH, 8> WeakTrackingVHList;
|
||||
typedef MapVector<Value *, WeakTrackingVHList> WeakTrackingVHListMap;
|
||||
using StoreList = SmallVector<StoreInst *, 8>;
|
||||
using StoreListMap = MapVector<Value *, StoreList>;
|
||||
using WeakTrackingVHList = SmallVector<WeakTrackingVH, 8>;
|
||||
using WeakTrackingVHListMap = MapVector<Value *, WeakTrackingVHList>;
|
||||
|
||||
ScalarEvolution *SE = nullptr;
|
||||
TargetTransformInfo *TTI = nullptr;
|
||||
@ -103,11 +121,14 @@ private:
|
||||
/// Try to vectorize trees that start at insertvalue instructions.
|
||||
bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB,
|
||||
slpvectorizer::BoUpSLP &R);
|
||||
|
||||
/// Try to vectorize trees that start at insertelement instructions.
|
||||
bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB,
|
||||
slpvectorizer::BoUpSLP &R);
|
||||
|
||||
/// Try to vectorize trees that start at compare instructions.
|
||||
bool vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, slpvectorizer::BoUpSLP &R);
|
||||
|
||||
/// Tries to vectorize constructs started from CmpInst, InsertValueInst or
|
||||
/// InsertElementInst instructions.
|
||||
bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
|
||||
@ -128,6 +149,7 @@ private:
|
||||
/// The getelementptr instructions in a basic block organized by base pointer.
|
||||
WeakTrackingVHListMap GEPs;
|
||||
};
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H
|
||||
|
@ -13,14 +13,26 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
@ -255,8 +267,9 @@ AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
|
||||
initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
AssumptionCacheTracker::~AssumptionCacheTracker() {}
|
||||
AssumptionCacheTracker::~AssumptionCacheTracker() = default;
|
||||
|
||||
char AssumptionCacheTracker::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker",
|
||||
"Assumption Cache Tracker", false, true)
|
||||
char AssumptionCacheTracker::ID = 0;
|
||||
|
@ -13,37 +13,66 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/ADT/APFloat.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include "llvm/Analysis/VectorUtils.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/ConstantRange.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/IR/GlobalAlias.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/IR/Statepoint.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
@ -70,6 +99,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Simplifying using an assume can only be done in a particular control-flow
|
||||
// context (the context instruction provides that context). If an assume and
|
||||
// the context instruction are not in the same block then the DT helps in
|
||||
@ -79,6 +109,7 @@ struct Query {
|
||||
AssumptionCache *AC;
|
||||
const Instruction *CxtI;
|
||||
const DominatorTree *DT;
|
||||
|
||||
// Unlike the other analyses, this may be a nullptr because not all clients
|
||||
// provide it currently.
|
||||
OptimizationRemarkEmitter *ORE;
|
||||
@ -92,11 +123,12 @@ struct Query {
|
||||
/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
|
||||
/// (all of which can call computeKnownBits), and so on.
|
||||
std::array<const Value *, MaxDepth> Excluded;
|
||||
unsigned NumExcluded;
|
||||
|
||||
unsigned NumExcluded = 0;
|
||||
|
||||
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
|
||||
const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
|
||||
: DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
|
||||
: DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {}
|
||||
|
||||
Query(const Query &Q, const Value *NewExcl)
|
||||
: DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
|
||||
@ -113,6 +145,7 @@ struct Query {
|
||||
return std::find(Excluded.begin(), End, Value) != End;
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
// Given the provided Value and, potentially, a context instruction, return
|
||||
@ -171,7 +204,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
|
||||
return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
|
||||
}
|
||||
|
||||
|
||||
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
|
||||
for (const User *U : CxtI->users()) {
|
||||
if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
|
||||
@ -380,7 +412,9 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
|
||||
continue;
|
||||
|
||||
// If all uses of this value are ephemeral, then so is this value.
|
||||
if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
|
||||
if (llvm::all_of(V->users(), [&](const User *U) {
|
||||
return EphValues.count(U);
|
||||
})) {
|
||||
if (V == E)
|
||||
return true;
|
||||
|
||||
@ -423,7 +457,6 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
|
||||
bool llvm::isValidAssumeForContext(const Instruction *Inv,
|
||||
const Instruction *CxtI,
|
||||
const DominatorTree *DT) {
|
||||
|
||||
// There are two restrictions on the use of an assume:
|
||||
// 1. The assume must dominate the context (or the control flow must
|
||||
// reach the assume whenever it reaches the context).
|
||||
@ -891,7 +924,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Instruction::Or: {
|
||||
case Instruction::Or:
|
||||
computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
|
||||
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
|
||||
|
||||
@ -900,7 +933,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
|
||||
// Output known-1 are known to be set if set in either the LHS | RHS.
|
||||
Known.One |= Known2.One;
|
||||
break;
|
||||
}
|
||||
case Instruction::Xor: {
|
||||
computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
|
||||
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
|
||||
@ -1911,7 +1943,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
|
||||
}
|
||||
}
|
||||
// Check if all incoming values are non-zero constant.
|
||||
bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) {
|
||||
bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) {
|
||||
return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero();
|
||||
});
|
||||
if (AllNonZeroConstants)
|
||||
@ -2494,7 +2526,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
|
||||
///
|
||||
/// NOTE: this function will need to be revisited when we support non-default
|
||||
/// rounding modes!
|
||||
///
|
||||
bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
|
||||
unsigned Depth) {
|
||||
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
|
||||
@ -2723,7 +2754,6 @@ Value *llvm::isBytewiseValue(Value *V) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
// This is the recursive version of BuildSubAggregate. It takes a few different
|
||||
// arguments. Idxs is the index within the nested struct From that we are
|
||||
// looking at now (which is of type IndexedType). IdxSkip is the number of
|
||||
@ -2734,7 +2764,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
|
||||
SmallVectorImpl<unsigned> &Idxs,
|
||||
unsigned IdxSkip,
|
||||
Instruction *InsertBefore) {
|
||||
llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
|
||||
StructType *STy = dyn_cast<StructType>(IndexedType);
|
||||
if (STy) {
|
||||
// Save the original To argument so we can modify it
|
||||
Value *OrigTo = To;
|
||||
@ -2773,8 +2803,8 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
|
||||
return nullptr;
|
||||
|
||||
// Insert the value in the new (sub) aggregrate
|
||||
return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
|
||||
"tmp", InsertBefore);
|
||||
return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
|
||||
"tmp", InsertBefore);
|
||||
}
|
||||
|
||||
// This helper takes a nested struct and extracts a part of it (which is again a
|
||||
@ -3745,7 +3775,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
|
||||
return true;
|
||||
};
|
||||
|
||||
return any_of(GuardingBranches, AllUsesGuardedByBranch);
|
||||
return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
|
||||
}
|
||||
|
||||
|
||||
@ -3949,7 +3979,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
|
||||
}
|
||||
|
||||
break;
|
||||
};
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -12,37 +12,67 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/SetOperations.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/MDBuilder.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Statepoint.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#define DEBUG_TYPE "rewrite-statepoints-for-gc"
|
||||
|
||||
@ -53,6 +83,7 @@ static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
|
||||
cl::init(false));
|
||||
static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
|
||||
cl::init(false));
|
||||
|
||||
// Print out the base pointers for debugging
|
||||
static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
|
||||
cl::init(false));
|
||||
@ -68,6 +99,7 @@ static bool ClobberNonLive = true;
|
||||
#else
|
||||
static bool ClobberNonLive = false;
|
||||
#endif
|
||||
|
||||
static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
|
||||
cl::location(ClobberNonLive),
|
||||
cl::Hidden);
|
||||
@ -77,13 +109,16 @@ static cl::opt<bool>
|
||||
cl::Hidden, cl::init(true));
|
||||
|
||||
namespace {
|
||||
|
||||
struct RewriteStatepointsForGC : public ModulePass {
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
|
||||
RewriteStatepointsForGC() : ModulePass(ID) {
|
||||
initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnFunction(Function &F);
|
||||
|
||||
bool runOnModule(Module &M) override {
|
||||
bool Changed = false;
|
||||
for (Function &F : M)
|
||||
@ -121,12 +156,14 @@ struct RewriteStatepointsForGC : public ModulePass {
|
||||
// Helpers for stripNonValidAttributesAndMetadata
|
||||
void stripNonValidAttributesAndMetadataFromBody(Function &F);
|
||||
void stripNonValidAttributesFromPrototype(Function &F);
|
||||
|
||||
// Certain metadata on instructions are invalid after running RS4GC.
|
||||
// Optimizations that run after RS4GC can incorrectly use this metadata to
|
||||
// optimize functions. We drop such metadata on the instruction.
|
||||
void stripInvalidMetadataFromInstruction(Instruction &I);
|
||||
};
|
||||
} // namespace
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char RewriteStatepointsForGC::ID = 0;
|
||||
|
||||
@ -142,9 +179,11 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
|
||||
"Make relocations explicit at statepoints", false, false)
|
||||
|
||||
namespace {
|
||||
|
||||
struct GCPtrLivenessData {
|
||||
/// Values defined in this block.
|
||||
MapVector<BasicBlock *, SetVector<Value *>> KillSet;
|
||||
|
||||
/// Values used in this block (and thus live); does not included values
|
||||
/// killed within this block.
|
||||
MapVector<BasicBlock *, SetVector<Value *>> LiveSet;
|
||||
@ -168,10 +207,10 @@ struct GCPtrLivenessData {
|
||||
// Generally, after the execution of a full findBasePointer call, only the
|
||||
// base relation will remain. Internally, we add a mixture of the two
|
||||
// types, then update all the second type to the first type
|
||||
typedef MapVector<Value *, Value *> DefiningValueMapTy;
|
||||
typedef SetVector<Value *> StatepointLiveSetTy;
|
||||
typedef MapVector<AssertingVH<Instruction>, AssertingVH<Value>>
|
||||
RematerializedValueMapTy;
|
||||
using DefiningValueMapTy = MapVector<Value *, Value *>;
|
||||
using StatepointLiveSetTy = SetVector<Value *>;
|
||||
using RematerializedValueMapTy =
|
||||
MapVector<AssertingVH<Instruction>, AssertingVH<Value>>;
|
||||
|
||||
struct PartiallyConstructedSafepointRecord {
|
||||
/// The set of values known to be live across this safepoint
|
||||
@ -193,7 +232,8 @@ struct PartiallyConstructedSafepointRecord {
|
||||
/// Maps rematerialized copy to it's original value.
|
||||
RematerializedValueMapTy RematerializedValues;
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
|
||||
Optional<OperandBundleUse> DeoptBundle =
|
||||
@ -256,7 +296,7 @@ static bool containsGCPtrType(Type *Ty) {
|
||||
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
|
||||
return containsGCPtrType(AT->getElementType());
|
||||
if (StructType *ST = dyn_cast<StructType>(Ty))
|
||||
return any_of(ST->subtypes(), containsGCPtrType);
|
||||
return llvm::any_of(ST->subtypes(), containsGCPtrType);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -301,7 +341,9 @@ analyzeParsePointLiveness(DominatorTree &DT,
|
||||
}
|
||||
|
||||
static bool isKnownBaseResult(Value *V);
|
||||
|
||||
namespace {
|
||||
|
||||
/// A single base defining value - An immediate base defining value for an
|
||||
/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
|
||||
/// For instructions which have multiple pointer [vector] inputs or that
|
||||
@ -313,9 +355,11 @@ namespace {
|
||||
struct BaseDefiningValueResult {
|
||||
/// Contains the value which is the base defining value.
|
||||
Value * const BDV;
|
||||
|
||||
/// True if the base defining value is also known to be an actual base
|
||||
/// pointer.
|
||||
const bool IsKnownBase;
|
||||
|
||||
BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
|
||||
: BDV(BDV), IsKnownBase(IsKnownBase) {
|
||||
#ifndef NDEBUG
|
||||
@ -326,7 +370,8 @@ struct BaseDefiningValueResult {
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
static BaseDefiningValueResult findBaseDefiningValue(Value *I);
|
||||
|
||||
@ -431,7 +476,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
|
||||
if (isa<LoadInst>(I))
|
||||
// The value loaded is an gc base itself
|
||||
return BaseDefiningValueResult(I, true);
|
||||
|
||||
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
|
||||
// The base of this GEP is the base
|
||||
@ -444,12 +488,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
|
||||
break;
|
||||
case Intrinsic::experimental_gc_statepoint:
|
||||
llvm_unreachable("statepoints don't produce pointers");
|
||||
case Intrinsic::experimental_gc_relocate: {
|
||||
case Intrinsic::experimental_gc_relocate:
|
||||
// Rerunning safepoint insertion after safepoints are already
|
||||
// inserted is not supported. It could probably be made to work,
|
||||
// but why are you doing this? There's no good reason.
|
||||
llvm_unreachable("repeat safepoint insertion is not supported");
|
||||
}
|
||||
case Intrinsic::gcroot:
|
||||
// Currently, this mechanism hasn't been extended to work with gcroot.
|
||||
// There's no reason it couldn't be, but I haven't thought about the
|
||||
@ -553,6 +596,7 @@ static bool isKnownBaseResult(Value *V) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// Models the state of a single base defining value in the findBasePointer
|
||||
/// algorithm for determining where a new instruction is needed to propagate
|
||||
/// the base of this BDV.
|
||||
@ -560,7 +604,7 @@ class BDVState {
|
||||
public:
|
||||
enum Status { Unknown, Base, Conflict };
|
||||
|
||||
BDVState() : Status(Unknown), BaseValue(nullptr) {}
|
||||
BDVState() : BaseValue(nullptr) {}
|
||||
|
||||
explicit BDVState(Status Status, Value *BaseValue = nullptr)
|
||||
: Status(Status), BaseValue(BaseValue) {
|
||||
@ -599,16 +643,17 @@ public:
|
||||
case Conflict:
|
||||
OS << "C";
|
||||
break;
|
||||
};
|
||||
}
|
||||
OS << " (" << getBaseValue() << " - "
|
||||
<< (getBaseValue() ? getBaseValue()->getName() : "nullptr") << "): ";
|
||||
}
|
||||
|
||||
private:
|
||||
Status Status;
|
||||
Status Status = Unknown;
|
||||
AssertingVH<Value> BaseValue; // Non-null only if Status == Base.
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
#ifndef NDEBUG
|
||||
static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
|
||||
@ -1171,7 +1216,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
|
||||
return;
|
||||
|
||||
auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
|
||||
auto ValIt = find(LiveVec, Val);
|
||||
auto ValIt = llvm::find(LiveVec, Val);
|
||||
assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
|
||||
size_t Index = std::distance(LiveVec.begin(), ValIt);
|
||||
assert(Index < LiveVec.size() && "Bug in std::find?");
|
||||
@ -1231,7 +1276,7 @@ class DeferredReplacement {
|
||||
AssertingVH<Instruction> New;
|
||||
bool IsDeoptimize = false;
|
||||
|
||||
DeferredReplacement() {}
|
||||
DeferredReplacement() = default;
|
||||
|
||||
public:
|
||||
static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
|
||||
@ -1288,7 +1333,8 @@ public:
|
||||
OldI->eraseFromParent();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
static StringRef getDeoptLowering(CallSite CS) {
|
||||
const char *DeoptLowering = "deopt-lowering";
|
||||
@ -1306,7 +1352,6 @@ static StringRef getDeoptLowering(CallSite CS) {
|
||||
return "live-through";
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
makeStatepointExplicitImpl(const CallSite CS, /* to replace */
|
||||
const SmallVectorImpl<Value *> &BasePtrs,
|
||||
@ -1530,7 +1575,6 @@ static void
|
||||
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
|
||||
DenseMap<Value *, Value *> &AllocaMap,
|
||||
DenseSet<Value *> &VisitedLiveValues) {
|
||||
|
||||
for (User *U : GCRelocs) {
|
||||
GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
|
||||
if (!Relocate)
|
||||
@ -1566,7 +1610,6 @@ static void insertRematerializationStores(
|
||||
const RematerializedValueMapTy &RematerializedValues,
|
||||
DenseMap<Value *, Value *> &AllocaMap,
|
||||
DenseSet<Value *> &VisitedLiveValues) {
|
||||
|
||||
for (auto RematerializedValuePair: RematerializedValues) {
|
||||
Instruction *RematerializedValue = RematerializedValuePair.first;
|
||||
Value *OriginalValue = RematerializedValuePair.second;
|
||||
@ -1832,7 +1875,6 @@ static void findLiveReferences(
|
||||
static Value* findRematerializableChainToBasePointer(
|
||||
SmallVectorImpl<Instruction*> &ChainToBase,
|
||||
Value *CurrentValue) {
|
||||
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
|
||||
ChainToBase.push_back(GEP);
|
||||
return findRematerializableChainToBasePointer(ChainToBase,
|
||||
@ -1888,7 +1930,6 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
|
||||
}
|
||||
|
||||
static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
|
||||
|
||||
unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
|
||||
if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
|
||||
OrigRootPhi.getParent() != AlternateRootPhi.getParent())
|
||||
@ -1912,7 +1953,6 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
// From the statepoint live set pick values that are cheaper to recompute then
|
||||
@ -2313,7 +2353,6 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
|
||||
}
|
||||
|
||||
void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) {
|
||||
|
||||
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
|
||||
return;
|
||||
// These are the attributes that are still valid on loads and stores after
|
||||
@ -2339,7 +2378,6 @@ void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I
|
||||
|
||||
// Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
|
||||
I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
|
||||
|
||||
}
|
||||
|
||||
void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) {
|
||||
@ -2349,7 +2387,6 @@ void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Functio
|
||||
LLVMContext &Ctx = F.getContext();
|
||||
MDBuilder Builder(Ctx);
|
||||
|
||||
|
||||
for (Instruction &I : instructions(F)) {
|
||||
if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
|
||||
assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
|
||||
@ -2398,7 +2435,7 @@ static bool shouldRewriteStatepointsIn(Function &F) {
|
||||
|
||||
void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) {
|
||||
#ifndef NDEBUG
|
||||
assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!");
|
||||
assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!");
|
||||
#endif
|
||||
|
||||
for (Function &F : M)
|
||||
@ -2666,7 +2703,6 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
|
||||
|
||||
static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
|
||||
StatepointLiveSetTy &Out) {
|
||||
|
||||
BasicBlock *BB = Inst->getParent();
|
||||
|
||||
// Note: The copy is intentional and required
|
||||
|
@ -24,28 +24,53 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Scalar/SROA.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/PointerIntPair.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/ADT/iterator.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
#include "llvm/Analysis/PtrUseVisitor.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/ConstantFolder.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DIBuilder.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DebugInfo.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/IR/GlobalAlias.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Chrono.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
@ -55,6 +80,17 @@
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#ifndef NDEBUG
|
||||
// We only use this for a debug check.
|
||||
@ -88,10 +124,12 @@ static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
|
||||
cl::Hidden);
|
||||
|
||||
namespace {
|
||||
|
||||
/// \brief A custom IRBuilder inserter which prefixes all names, but only in
|
||||
/// Assert builds.
|
||||
class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter {
|
||||
std::string Prefix;
|
||||
|
||||
const Twine getNameWithPrefix(const Twine &Name) const {
|
||||
return Name.isTriviallyEmpty() ? Name : Prefix + Name;
|
||||
}
|
||||
@ -107,11 +145,9 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief Provide a typedef for IRBuilder that drops names in release builds.
|
||||
using IRBuilderTy = llvm::IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
|
||||
}
|
||||
/// \brief Provide a type for IRBuilder that drops names in release builds.
|
||||
using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
|
||||
|
||||
namespace {
|
||||
/// \brief A used slice of an alloca.
|
||||
///
|
||||
/// This structure represents a slice of an alloca used by some instruction. It
|
||||
@ -120,17 +156,18 @@ namespace {
|
||||
/// or not when forming partitions of the alloca.
|
||||
class Slice {
|
||||
/// \brief The beginning offset of the range.
|
||||
uint64_t BeginOffset;
|
||||
uint64_t BeginOffset = 0;
|
||||
|
||||
/// \brief The ending offset, not included in the range.
|
||||
uint64_t EndOffset;
|
||||
uint64_t EndOffset = 0;
|
||||
|
||||
/// \brief Storage for both the use of this slice and whether it can be
|
||||
/// split.
|
||||
PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
|
||||
|
||||
public:
|
||||
Slice() : BeginOffset(), EndOffset() {}
|
||||
Slice() = default;
|
||||
|
||||
Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
|
||||
: BeginOffset(BeginOffset), EndOffset(EndOffset),
|
||||
UseAndIsSplittable(U, IsSplittable) {}
|
||||
@ -180,12 +217,15 @@ public:
|
||||
}
|
||||
bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
namespace llvm {
|
||||
|
||||
template <typename T> struct isPodLike;
|
||||
template <> struct isPodLike<Slice> { static const bool value = true; };
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
/// \brief Representation of the alloca slices.
|
||||
///
|
||||
@ -207,13 +247,15 @@ public:
|
||||
|
||||
/// \brief Support for iterating over the slices.
|
||||
/// @{
|
||||
typedef SmallVectorImpl<Slice>::iterator iterator;
|
||||
typedef iterator_range<iterator> range;
|
||||
using iterator = SmallVectorImpl<Slice>::iterator;
|
||||
using range = iterator_range<iterator>;
|
||||
|
||||
iterator begin() { return Slices.begin(); }
|
||||
iterator end() { return Slices.end(); }
|
||||
|
||||
typedef SmallVectorImpl<Slice>::const_iterator const_iterator;
|
||||
typedef iterator_range<const_iterator> const_range;
|
||||
using const_iterator = SmallVectorImpl<Slice>::const_iterator;
|
||||
using const_range = iterator_range<const_iterator>;
|
||||
|
||||
const_iterator begin() const { return Slices.begin(); }
|
||||
const_iterator end() const { return Slices.end(); }
|
||||
/// @}
|
||||
@ -264,6 +306,7 @@ public:
|
||||
private:
|
||||
template <typename DerivedT, typename RetT = void> class BuilderBase;
|
||||
class SliceBuilder;
|
||||
|
||||
friend class AllocaSlices::SliceBuilder;
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
@ -320,7 +363,7 @@ private:
|
||||
friend class AllocaSlices;
|
||||
friend class AllocaSlices::partition_iterator;
|
||||
|
||||
typedef AllocaSlices::iterator iterator;
|
||||
using iterator = AllocaSlices::iterator;
|
||||
|
||||
/// \brief The beginning and ending offsets of the alloca for this
|
||||
/// partition.
|
||||
@ -403,12 +446,12 @@ class AllocaSlices::partition_iterator
|
||||
|
||||
/// \brief We also need to keep track of the maximum split end offset seen.
|
||||
/// FIXME: Do we really?
|
||||
uint64_t MaxSplitSliceEndOffset;
|
||||
uint64_t MaxSplitSliceEndOffset = 0;
|
||||
|
||||
/// \brief Sets the partition to be empty at given iterator, and sets the
|
||||
/// end iterator.
|
||||
partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
|
||||
: P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
|
||||
: P(SI), SE(SE) {
|
||||
// If not already at the end, advance our state to form the initial
|
||||
// partition.
|
||||
if (SI != SE)
|
||||
@ -432,19 +475,21 @@ class AllocaSlices::partition_iterator
|
||||
// Remove the uses which have ended in the prior partition. This
|
||||
// cannot change the max split slice end because we just checked that
|
||||
// the prior partition ended prior to that max.
|
||||
P.SplitTails.erase(
|
||||
remove_if(P.SplitTails,
|
||||
[&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
|
||||
P.SplitTails.end());
|
||||
assert(any_of(P.SplitTails,
|
||||
[&](Slice *S) {
|
||||
return S->endOffset() == MaxSplitSliceEndOffset;
|
||||
}) &&
|
||||
P.SplitTails.erase(llvm::remove_if(P.SplitTails,
|
||||
[&](Slice *S) {
|
||||
return S->endOffset() <=
|
||||
P.EndOffset;
|
||||
}),
|
||||
P.SplitTails.end());
|
||||
assert(llvm::any_of(P.SplitTails,
|
||||
[&](Slice *S) {
|
||||
return S->endOffset() == MaxSplitSliceEndOffset;
|
||||
}) &&
|
||||
"Could not find the current max split slice offset!");
|
||||
assert(all_of(P.SplitTails,
|
||||
[&](Slice *S) {
|
||||
return S->endOffset() <= MaxSplitSliceEndOffset;
|
||||
}) &&
|
||||
assert(llvm::all_of(P.SplitTails,
|
||||
[&](Slice *S) {
|
||||
return S->endOffset() <= MaxSplitSliceEndOffset;
|
||||
}) &&
|
||||
"Max split slice end offset is not actually the max!");
|
||||
}
|
||||
}
|
||||
@ -608,7 +653,8 @@ static Value *foldPHINodeOrSelectInst(Instruction &I) {
|
||||
class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
|
||||
friend class PtrUseVisitor<SliceBuilder>;
|
||||
friend class InstVisitor<SliceBuilder>;
|
||||
typedef PtrUseVisitor<SliceBuilder> Base;
|
||||
|
||||
using Base = PtrUseVisitor<SliceBuilder>;
|
||||
|
||||
const uint64_t AllocSize;
|
||||
AllocaSlices &AS;
|
||||
@ -996,8 +1042,9 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
|
||||
return;
|
||||
}
|
||||
|
||||
Slices.erase(remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
|
||||
Slices.end());
|
||||
Slices.erase(
|
||||
llvm::remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
|
||||
Slices.end());
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (SROARandomShuffleSlices) {
|
||||
@ -1820,11 +1867,12 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
|
||||
// do that until all the backends are known to produce good code for all
|
||||
// integer vector types.
|
||||
if (!HaveCommonEltTy) {
|
||||
CandidateTys.erase(remove_if(CandidateTys,
|
||||
[](VectorType *VTy) {
|
||||
return !VTy->getElementType()->isIntegerTy();
|
||||
}),
|
||||
CandidateTys.end());
|
||||
CandidateTys.erase(
|
||||
llvm::remove_if(CandidateTys,
|
||||
[](VectorType *VTy) {
|
||||
return !VTy->getElementType()->isIntegerTy();
|
||||
}),
|
||||
CandidateTys.end());
|
||||
|
||||
// If there were no integer vector types, give up.
|
||||
if (CandidateTys.empty())
|
||||
@ -2151,8 +2199,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
|
||||
class llvm::sroa::AllocaSliceRewriter
|
||||
: public InstVisitor<AllocaSliceRewriter, bool> {
|
||||
// Befriend the base class so it can delegate to private visit methods.
|
||||
friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
|
||||
typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
|
||||
friend class InstVisitor<AllocaSliceRewriter, bool>;
|
||||
|
||||
using Base = InstVisitor<AllocaSliceRewriter, bool>;
|
||||
|
||||
const DataLayout &DL;
|
||||
AllocaSlices &AS;
|
||||
@ -2182,16 +2231,18 @@ class llvm::sroa::AllocaSliceRewriter
|
||||
|
||||
// The original offset of the slice currently being rewritten relative to
|
||||
// the original alloca.
|
||||
uint64_t BeginOffset, EndOffset;
|
||||
uint64_t BeginOffset = 0;
|
||||
uint64_t EndOffset = 0;
|
||||
|
||||
// The new offsets of the slice currently being rewritten relative to the
|
||||
// original alloca.
|
||||
uint64_t NewBeginOffset, NewEndOffset;
|
||||
|
||||
uint64_t SliceSize;
|
||||
bool IsSplittable;
|
||||
bool IsSplit;
|
||||
Use *OldUse;
|
||||
Instruction *OldPtr;
|
||||
bool IsSplittable = false;
|
||||
bool IsSplit = false;
|
||||
Use *OldUse = nullptr;
|
||||
Instruction *OldPtr = nullptr;
|
||||
|
||||
// Track post-rewrite users which are PHI nodes and Selects.
|
||||
SmallSetVector<PHINode *, 8> &PHIUsers;
|
||||
@ -2221,8 +2272,7 @@ public:
|
||||
VecTy(PromotableVecTy),
|
||||
ElementTy(VecTy ? VecTy->getElementType() : nullptr),
|
||||
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
|
||||
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
|
||||
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
|
||||
PHIUsers(PHIUsers), SelectUsers(SelectUsers),
|
||||
IRB(NewAI.getContext(), ConstantFolder()) {
|
||||
if (VecTy) {
|
||||
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
|
||||
@ -2987,6 +3037,7 @@ private:
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
/// \brief Visitor to rewrite aggregate loads and stores as scalar.
|
||||
///
|
||||
/// This pass aggressively rewrites all aggregate loads and stores on
|
||||
@ -2994,7 +3045,7 @@ namespace {
|
||||
/// with scalar loads and stores.
|
||||
class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
|
||||
// Befriend the base class so it can delegate to private visit methods.
|
||||
friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
|
||||
friend class InstVisitor<AggLoadStoreRewriter, bool>;
|
||||
|
||||
/// Queue of pointer uses to analyze and potentially rewrite.
|
||||
SmallVector<Use *, 8> Queue;
|
||||
@ -3037,12 +3088,15 @@ private:
|
||||
protected:
|
||||
/// The builder used to form new instructions.
|
||||
IRBuilderTy IRB;
|
||||
|
||||
/// The indices which to be used with insert- or extractvalue to select the
|
||||
/// appropriate value within the aggregate.
|
||||
SmallVector<unsigned, 4> Indices;
|
||||
|
||||
/// The indices to a GEP instruction which will move Ptr to the correct slot
|
||||
/// within the aggregate.
|
||||
SmallVector<Value *, 4> GEPIndices;
|
||||
|
||||
/// The base pointer of the original op, used as a base for GEPing the
|
||||
/// split operations.
|
||||
Value *Ptr;
|
||||
@ -3193,7 +3247,8 @@ private:
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// \brief Strip aggregate type wrapping.
|
||||
///
|
||||
@ -3485,58 +3540,60 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||
// match relative to their starting offset. We have to verify this prior to
|
||||
// any rewriting.
|
||||
Stores.erase(
|
||||
remove_if(Stores,
|
||||
[&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
|
||||
// Lookup the load we are storing in our map of split
|
||||
// offsets.
|
||||
auto *LI = cast<LoadInst>(SI->getValueOperand());
|
||||
// If it was completely unsplittable, then we're done,
|
||||
// and this store can't be pre-split.
|
||||
if (UnsplittableLoads.count(LI))
|
||||
return true;
|
||||
llvm::remove_if(Stores,
|
||||
[&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
|
||||
// Lookup the load we are storing in our map of split
|
||||
// offsets.
|
||||
auto *LI = cast<LoadInst>(SI->getValueOperand());
|
||||
// If it was completely unsplittable, then we're done,
|
||||
// and this store can't be pre-split.
|
||||
if (UnsplittableLoads.count(LI))
|
||||
return true;
|
||||
|
||||
auto LoadOffsetsI = SplitOffsetsMap.find(LI);
|
||||
if (LoadOffsetsI == SplitOffsetsMap.end())
|
||||
return false; // Unrelated loads are definitely safe.
|
||||
auto &LoadOffsets = LoadOffsetsI->second;
|
||||
auto LoadOffsetsI = SplitOffsetsMap.find(LI);
|
||||
if (LoadOffsetsI == SplitOffsetsMap.end())
|
||||
return false; // Unrelated loads are definitely safe.
|
||||
auto &LoadOffsets = LoadOffsetsI->second;
|
||||
|
||||
// Now lookup the store's offsets.
|
||||
auto &StoreOffsets = SplitOffsetsMap[SI];
|
||||
// Now lookup the store's offsets.
|
||||
auto &StoreOffsets = SplitOffsetsMap[SI];
|
||||
|
||||
// If the relative offsets of each split in the load and
|
||||
// store match exactly, then we can split them and we
|
||||
// don't need to remove them here.
|
||||
if (LoadOffsets.Splits == StoreOffsets.Splits)
|
||||
return false;
|
||||
// If the relative offsets of each split in the load and
|
||||
// store match exactly, then we can split them and we
|
||||
// don't need to remove them here.
|
||||
if (LoadOffsets.Splits == StoreOffsets.Splits)
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << " Mismatched splits for load and store:\n"
|
||||
<< " " << *LI << "\n"
|
||||
<< " " << *SI << "\n");
|
||||
DEBUG(dbgs()
|
||||
<< " Mismatched splits for load and store:\n"
|
||||
<< " " << *LI << "\n"
|
||||
<< " " << *SI << "\n");
|
||||
|
||||
// We've found a store and load that we need to split
|
||||
// with mismatched relative splits. Just give up on them
|
||||
// and remove both instructions from our list of
|
||||
// candidates.
|
||||
UnsplittableLoads.insert(LI);
|
||||
return true;
|
||||
}),
|
||||
// We've found a store and load that we need to split
|
||||
// with mismatched relative splits. Just give up on them
|
||||
// and remove both instructions from our list of
|
||||
// candidates.
|
||||
UnsplittableLoads.insert(LI);
|
||||
return true;
|
||||
}),
|
||||
Stores.end());
|
||||
// Now we have to go *back* through all the stores, because a later store may
|
||||
// have caused an earlier store's load to become unsplittable and if it is
|
||||
// unsplittable for the later store, then we can't rely on it being split in
|
||||
// the earlier store either.
|
||||
Stores.erase(remove_if(Stores,
|
||||
[&UnsplittableLoads](StoreInst *SI) {
|
||||
auto *LI = cast<LoadInst>(SI->getValueOperand());
|
||||
return UnsplittableLoads.count(LI);
|
||||
}),
|
||||
Stores.erase(llvm::remove_if(Stores,
|
||||
[&UnsplittableLoads](StoreInst *SI) {
|
||||
auto *LI =
|
||||
cast<LoadInst>(SI->getValueOperand());
|
||||
return UnsplittableLoads.count(LI);
|
||||
}),
|
||||
Stores.end());
|
||||
// Once we've established all the loads that can't be split for some reason,
|
||||
// filter any that made it into our list out.
|
||||
Loads.erase(remove_if(Loads,
|
||||
[&UnsplittableLoads](LoadInst *LI) {
|
||||
return UnsplittableLoads.count(LI);
|
||||
}),
|
||||
Loads.erase(llvm::remove_if(Loads,
|
||||
[&UnsplittableLoads](LoadInst *LI) {
|
||||
return UnsplittableLoads.count(LI);
|
||||
}),
|
||||
Loads.end());
|
||||
|
||||
// If no loads or stores are left, there is no pre-splitting to be done for
|
||||
@ -3804,7 +3861,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||
}
|
||||
|
||||
// Remove the killed slices that have ben pre-split.
|
||||
AS.erase(remove_if(AS, [](const Slice &S) { return S.isDead(); }), AS.end());
|
||||
AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }),
|
||||
AS.end());
|
||||
|
||||
// Insert our new slices. This will sort and merge them into the sorted
|
||||
// sequence.
|
||||
@ -3819,7 +3877,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||
// Finally, don't try to promote any allocas that new require re-splitting.
|
||||
// They have already been added to the worklist above.
|
||||
PromotableAllocas.erase(
|
||||
remove_if(
|
||||
llvm::remove_if(
|
||||
PromotableAllocas,
|
||||
[&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
|
||||
PromotableAllocas.end());
|
||||
@ -4256,7 +4314,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
|
||||
auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
|
||||
Worklist.remove_if(IsInSet);
|
||||
PostPromotionWorklist.remove_if(IsInSet);
|
||||
PromotableAllocas.erase(remove_if(PromotableAllocas, IsInSet),
|
||||
PromotableAllocas.erase(llvm::remove_if(PromotableAllocas, IsInSet),
|
||||
PromotableAllocas.end());
|
||||
DeletedAllocas.clear();
|
||||
}
|
||||
@ -4291,9 +4349,12 @@ class llvm::sroa::SROALegacyPass : public FunctionPass {
|
||||
SROA Impl;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
SROALegacyPass() : FunctionPass(ID) {
|
||||
initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnFunction(Function &F) override {
|
||||
if (skipFunction(F))
|
||||
return false;
|
||||
@ -4303,6 +4364,7 @@ public:
|
||||
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
|
||||
return !PA.areAllPreserved();
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
@ -4311,7 +4373,6 @@ public:
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return "SROA"; }
|
||||
static char ID;
|
||||
};
|
||||
|
||||
char SROALegacyPass::ID = 0;
|
||||
|
@ -15,38 +15,86 @@
|
||||
// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/iterator.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/DemandedBits.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/Analysis/VectorUtils.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DebugLoc.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/NoFolder.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/DOTGraphTraits.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/GraphWriter.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include "llvm/Transforms/Vectorize.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::PatternMatch;
|
||||
@ -382,7 +430,6 @@ static bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) {
|
||||
/// possible scalar operand in vectorized instruction.
|
||||
static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
|
||||
TargetLibraryInfo *TLI) {
|
||||
|
||||
unsigned Opcode = UserInst->getOpcode();
|
||||
switch (Opcode) {
|
||||
case Instruction::Load: {
|
||||
@ -427,24 +474,25 @@ static bool isSimple(Instruction *I) {
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace slpvectorizer {
|
||||
|
||||
/// Bottom Up SLP Vectorizer.
|
||||
class BoUpSLP {
|
||||
public:
|
||||
typedef SmallVector<Value *, 8> ValueList;
|
||||
typedef SmallVector<Instruction *, 16> InstrList;
|
||||
typedef SmallPtrSet<Value *, 16> ValueSet;
|
||||
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||
typedef MapVector<Value *, SmallVector<Instruction *, 2>>
|
||||
ExtraValueToDebugLocsMap;
|
||||
using ValueList = SmallVector<Value *, 8>;
|
||||
using InstrList = SmallVector<Instruction *, 16>;
|
||||
using ValueSet = SmallPtrSet<Value *, 16>;
|
||||
using StoreList = SmallVector<StoreInst *, 8>;
|
||||
using ExtraValueToDebugLocsMap =
|
||||
MapVector<Value *, SmallVector<Instruction *, 2>>;
|
||||
|
||||
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
|
||||
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
|
||||
DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
|
||||
const DataLayout *DL, OptimizationRemarkEmitter *ORE)
|
||||
: NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
|
||||
SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB),
|
||||
DL(DL), ORE(ORE), Builder(Se->getContext()) {
|
||||
: F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC),
|
||||
DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
|
||||
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
|
||||
// Use the vector register size specified by the target unless overridden
|
||||
// by a command-line option.
|
||||
@ -466,6 +514,7 @@ public:
|
||||
/// \brief Vectorize the tree that starts with the elements in \p VL.
|
||||
/// Returns the vectorized root.
|
||||
Value *vectorizeTree();
|
||||
|
||||
/// Vectorize the tree but with the list of externally used values \p
|
||||
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
|
||||
/// generated extractvalue instructions.
|
||||
@ -483,6 +532,7 @@ public:
|
||||
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
|
||||
void buildTree(ArrayRef<Value *> Roots,
|
||||
ArrayRef<Value *> UserIgnoreLst = None);
|
||||
|
||||
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
|
||||
/// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
|
||||
/// into account (anf updating it, if required) list of externally used
|
||||
@ -599,15 +649,14 @@ private:
|
||||
void reorderAltShuffleOperands(unsigned Opcode, ArrayRef<Value *> VL,
|
||||
SmallVectorImpl<Value *> &Left,
|
||||
SmallVectorImpl<Value *> &Right);
|
||||
|
||||
/// \reorder commutative operands to get better probability of
|
||||
/// generating vectorized code.
|
||||
void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL,
|
||||
SmallVectorImpl<Value *> &Left,
|
||||
SmallVectorImpl<Value *> &Right);
|
||||
struct TreeEntry {
|
||||
TreeEntry(std::vector<TreeEntry> &Container)
|
||||
: Scalars(), VectorizedValue(nullptr), NeedToGather(0),
|
||||
Container(Container) {}
|
||||
TreeEntry(std::vector<TreeEntry> &Container) : Container(Container) {}
|
||||
|
||||
/// \returns true if the scalars in VL are equal to this entry.
|
||||
bool isSame(ArrayRef<Value *> VL) const {
|
||||
@ -619,10 +668,10 @@ private:
|
||||
ValueList Scalars;
|
||||
|
||||
/// The Scalars are vectorized into this value. It is initialized to Null.
|
||||
Value *VectorizedValue;
|
||||
Value *VectorizedValue = nullptr;
|
||||
|
||||
/// Do we need to gather this sequence ?
|
||||
bool NeedToGather;
|
||||
bool NeedToGather = false;
|
||||
|
||||
/// Points back to the VectorizableTree.
|
||||
///
|
||||
@ -686,16 +735,19 @@ private:
|
||||
|
||||
/// This POD struct describes one external user in the vectorized tree.
|
||||
struct ExternalUser {
|
||||
ExternalUser (Value *S, llvm::User *U, int L) :
|
||||
Scalar(S), User(U), Lane(L){}
|
||||
ExternalUser(Value *S, llvm::User *U, int L)
|
||||
: Scalar(S), User(U), Lane(L) {}
|
||||
|
||||
// Which scalar in our function.
|
||||
Value *Scalar;
|
||||
|
||||
// Which user that uses the scalar.
|
||||
llvm::User *User;
|
||||
|
||||
// Which lane does the scalar belong to.
|
||||
int Lane;
|
||||
};
|
||||
typedef SmallVector<ExternalUser, 16> UserList;
|
||||
using UserList = SmallVector<ExternalUser, 16>;
|
||||
|
||||
/// Checks if two instructions may access the same memory.
|
||||
///
|
||||
@ -703,7 +755,6 @@ private:
|
||||
/// is invariant in the calling loop.
|
||||
bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
|
||||
Instruction *Inst2) {
|
||||
|
||||
// First check if the result is already in the cache.
|
||||
AliasCacheKey key = std::make_pair(Inst1, Inst2);
|
||||
Optional<bool> &result = AliasCache[key];
|
||||
@ -721,7 +772,7 @@ private:
|
||||
return aliased;
|
||||
}
|
||||
|
||||
typedef std::pair<Instruction *, Instruction *> AliasCacheKey;
|
||||
using AliasCacheKey = std::pair<Instruction *, Instruction *>;
|
||||
|
||||
/// Cache for alias results.
|
||||
/// TODO: consider moving this to the AliasAnalysis itself.
|
||||
@ -754,6 +805,7 @@ private:
|
||||
|
||||
/// Holds all of the instructions that we gathered.
|
||||
SetVector<Instruction *> GatherSeq;
|
||||
|
||||
/// A list of blocks that we are going to CSE.
|
||||
SetVector<BasicBlock *> CSEBlocks;
|
||||
|
||||
@ -762,17 +814,11 @@ private:
|
||||
/// instruction bundle (= a group of instructions which is combined into a
|
||||
/// vector instruction).
|
||||
struct ScheduleData {
|
||||
|
||||
// The initial value for the dependency counters. It means that the
|
||||
// dependencies are not calculated yet.
|
||||
enum { InvalidDeps = -1 };
|
||||
|
||||
ScheduleData()
|
||||
: Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr),
|
||||
NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0),
|
||||
Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps),
|
||||
UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false),
|
||||
OpValue(nullptr) {}
|
||||
ScheduleData() = default;
|
||||
|
||||
void init(int BlockSchedulingRegionID, Value *OpVal) {
|
||||
FirstInBundle = this;
|
||||
@ -842,19 +888,19 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
Instruction *Inst;
|
||||
Instruction *Inst = nullptr;
|
||||
|
||||
/// Points to the head in an instruction bundle (and always to this for
|
||||
/// single instructions).
|
||||
ScheduleData *FirstInBundle;
|
||||
ScheduleData *FirstInBundle = nullptr;
|
||||
|
||||
/// Single linked list of all instructions in a bundle. Null if it is a
|
||||
/// single instruction.
|
||||
ScheduleData *NextInBundle;
|
||||
ScheduleData *NextInBundle = nullptr;
|
||||
|
||||
/// Single linked list of all memory instructions (e.g. load, store, call)
|
||||
/// in the block - until the end of the scheduling region.
|
||||
ScheduleData *NextLoadStore;
|
||||
ScheduleData *NextLoadStore = nullptr;
|
||||
|
||||
/// The dependent memory instructions.
|
||||
/// This list is derived on demand in calculateDependencies().
|
||||
@ -862,34 +908,33 @@ private:
|
||||
|
||||
/// This ScheduleData is in the current scheduling region if this matches
|
||||
/// the current SchedulingRegionID of BlockScheduling.
|
||||
int SchedulingRegionID;
|
||||
int SchedulingRegionID = 0;
|
||||
|
||||
/// Used for getting a "good" final ordering of instructions.
|
||||
int SchedulingPriority;
|
||||
int SchedulingPriority = 0;
|
||||
|
||||
/// The number of dependencies. Constitutes of the number of users of the
|
||||
/// instruction plus the number of dependent memory instructions (if any).
|
||||
/// This value is calculated on demand.
|
||||
/// If InvalidDeps, the number of dependencies is not calculated yet.
|
||||
///
|
||||
int Dependencies;
|
||||
int Dependencies = InvalidDeps;
|
||||
|
||||
/// The number of dependencies minus the number of dependencies of scheduled
|
||||
/// instructions. As soon as this is zero, the instruction/bundle gets ready
|
||||
/// for scheduling.
|
||||
/// Note that this is negative as long as Dependencies is not calculated.
|
||||
int UnscheduledDeps;
|
||||
int UnscheduledDeps = InvalidDeps;
|
||||
|
||||
/// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
|
||||
/// single instructions.
|
||||
int UnscheduledDepsInBundle;
|
||||
int UnscheduledDepsInBundle = InvalidDeps;
|
||||
|
||||
/// True if this instruction is scheduled (or considered as scheduled in the
|
||||
/// dry-run).
|
||||
bool IsScheduled;
|
||||
bool IsScheduled = false;
|
||||
|
||||
/// Opcode of the current instruction in the schedule data.
|
||||
Value *OpValue;
|
||||
Value *OpValue = nullptr;
|
||||
};
|
||||
|
||||
#ifndef NDEBUG
|
||||
@ -903,18 +948,9 @@ private:
|
||||
friend struct DOTGraphTraits<BoUpSLP *>;
|
||||
|
||||
/// Contains all scheduling data for a basic block.
|
||||
///
|
||||
struct BlockScheduling {
|
||||
|
||||
BlockScheduling(BasicBlock *BB)
|
||||
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
|
||||
ScheduleStart(nullptr), ScheduleEnd(nullptr),
|
||||
FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
|
||||
ScheduleRegionSize(0),
|
||||
ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
|
||||
// Make sure that the initial SchedulingRegionID is greater than the
|
||||
// initial SchedulingRegionID in ScheduleData (which is 0).
|
||||
SchedulingRegionID(1) {}
|
||||
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}
|
||||
|
||||
void clear() {
|
||||
ReadyInsts.clear();
|
||||
@ -1090,28 +1126,30 @@ private:
|
||||
ReadyList ReadyInsts;
|
||||
|
||||
/// The first instruction of the scheduling region.
|
||||
Instruction *ScheduleStart;
|
||||
Instruction *ScheduleStart = nullptr;
|
||||
|
||||
/// The first instruction _after_ the scheduling region.
|
||||
Instruction *ScheduleEnd;
|
||||
Instruction *ScheduleEnd = nullptr;
|
||||
|
||||
/// The first memory accessing instruction in the scheduling region
|
||||
/// (can be null).
|
||||
ScheduleData *FirstLoadStoreInRegion;
|
||||
ScheduleData *FirstLoadStoreInRegion = nullptr;
|
||||
|
||||
/// The last memory accessing instruction in the scheduling region
|
||||
/// (can be null).
|
||||
ScheduleData *LastLoadStoreInRegion;
|
||||
ScheduleData *LastLoadStoreInRegion = nullptr;
|
||||
|
||||
/// The current size of the scheduling region.
|
||||
int ScheduleRegionSize;
|
||||
int ScheduleRegionSize = 0;
|
||||
|
||||
/// The maximum size allowed for the scheduling region.
|
||||
int ScheduleRegionSizeLimit;
|
||||
int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;
|
||||
|
||||
/// The ID of the scheduling region. For a new vectorization iteration this
|
||||
/// is incremented which "removes" all ScheduleData from the region.
|
||||
int SchedulingRegionID;
|
||||
int SchedulingRegionID = 1;
|
||||
// Make sure that the initial SchedulingRegionID is greater than the
|
||||
// initial SchedulingRegionID in ScheduleData (which is 0).
|
||||
};
|
||||
|
||||
/// Attaches the BlockScheduling structures to basic blocks.
|
||||
@ -1125,10 +1163,10 @@ private:
|
||||
ArrayRef<Value *> UserIgnoreList;
|
||||
|
||||
// Number of load bundles that contain consecutive loads.
|
||||
int NumLoadsWantToKeepOrder;
|
||||
int NumLoadsWantToKeepOrder = 0;
|
||||
|
||||
// Number of load bundles that contain consecutive loads in reversed order.
|
||||
int NumLoadsWantToChangeOrder;
|
||||
int NumLoadsWantToChangeOrder = 0;
|
||||
|
||||
// Analysis and block reference.
|
||||
Function *F;
|
||||
@ -1155,20 +1193,20 @@ private:
|
||||
/// original width.
|
||||
MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
|
||||
};
|
||||
|
||||
} // end namespace slpvectorizer
|
||||
|
||||
template <> struct GraphTraits<BoUpSLP *> {
|
||||
typedef BoUpSLP::TreeEntry TreeEntry;
|
||||
using TreeEntry = BoUpSLP::TreeEntry;
|
||||
|
||||
/// NodeRef has to be a pointer per the GraphWriter.
|
||||
typedef TreeEntry *NodeRef;
|
||||
using NodeRef = TreeEntry *;
|
||||
|
||||
/// \brief Add the VectorizableTree to the index iterator to be able to return
|
||||
/// TreeEntry pointers.
|
||||
struct ChildIteratorType
|
||||
: public iterator_adaptor_base<ChildIteratorType,
|
||||
SmallVector<int, 1>::iterator> {
|
||||
|
||||
std::vector<TreeEntry> &VectorizableTree;
|
||||
|
||||
ChildIteratorType(SmallVector<int, 1>::iterator W,
|
||||
@ -1183,17 +1221,19 @@ template <> struct GraphTraits<BoUpSLP *> {
|
||||
static ChildIteratorType child_begin(NodeRef N) {
|
||||
return {N->UserTreeIndices.begin(), N->Container};
|
||||
}
|
||||
|
||||
static ChildIteratorType child_end(NodeRef N) {
|
||||
return {N->UserTreeIndices.end(), N->Container};
|
||||
}
|
||||
|
||||
/// For the node iterator we just need to turn the TreeEntry iterator into a
|
||||
/// TreeEntry* iterator so that it dereferences to NodeRef.
|
||||
typedef pointer_iterator<std::vector<TreeEntry>::iterator> nodes_iterator;
|
||||
using nodes_iterator = pointer_iterator<std::vector<TreeEntry>::iterator>;
|
||||
|
||||
static nodes_iterator nodes_begin(BoUpSLP *R) {
|
||||
return nodes_iterator(R->VectorizableTree.begin());
|
||||
}
|
||||
|
||||
static nodes_iterator nodes_end(BoUpSLP *R) {
|
||||
return nodes_iterator(R->VectorizableTree.end());
|
||||
}
|
||||
@ -1202,7 +1242,7 @@ template <> struct GraphTraits<BoUpSLP *> {
|
||||
};
|
||||
|
||||
template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
|
||||
typedef BoUpSLP::TreeEntry TreeEntry;
|
||||
using TreeEntry = BoUpSLP::TreeEntry;
|
||||
|
||||
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
|
||||
|
||||
@ -1239,6 +1279,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||
ExtraValueToDebugLocsMap ExternallyUsedValues;
|
||||
buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
|
||||
}
|
||||
|
||||
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||
ExtraValueToDebugLocsMap &ExternallyUsedValues,
|
||||
ArrayRef<Value *> UserIgnoreLst) {
|
||||
@ -1627,7 +1668,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
case Instruction::Xor:
|
||||
newTreeEntry(VL, true, UserTreeIdx);
|
||||
DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
|
||||
|
||||
@ -1650,7 +1691,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
case Instruction::GetElementPtr: {
|
||||
// We don't combine GEPs with complicated (nested) indexing.
|
||||
for (unsigned j = 0; j < VL.size(); ++j) {
|
||||
@ -1784,7 +1825,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
}
|
||||
return;
|
||||
}
|
||||
case Instruction::ShuffleVector: {
|
||||
case Instruction::ShuffleVector:
|
||||
// If this is not an alternate sequence of opcode like add-sub
|
||||
// then do not vectorize this instruction.
|
||||
if (!isAltShuffle) {
|
||||
@ -1814,7 +1855,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
default:
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, false, UserTreeIdx);
|
||||
@ -1942,11 +1983,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
|
||||
Instruction *VL0 = cast<Instruction>(VL[0]);
|
||||
switch (Opcode) {
|
||||
case Instruction::PHI: {
|
||||
case Instruction::PHI:
|
||||
return 0;
|
||||
}
|
||||
|
||||
case Instruction::ExtractValue:
|
||||
case Instruction::ExtractElement: {
|
||||
case Instruction::ExtractElement:
|
||||
if (canReuseExtract(VL, VL0)) {
|
||||
int DeadCost = 0;
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
|
||||
@ -1962,7 +2003,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
return -DeadCost;
|
||||
}
|
||||
return getGatherCost(VecTy);
|
||||
}
|
||||
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
case Instruction::FPToUI:
|
||||
@ -2173,7 +2214,6 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
|
||||
}
|
||||
|
||||
bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
|
||||
|
||||
// We can vectorize the tree if its size is greater than or equal to the
|
||||
// minimum size specified by the MinTreeSize command line option.
|
||||
if (VectorizableTree.size() >= MinTreeSize)
|
||||
@ -2465,8 +2505,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
|
||||
ArrayRef<Value *> VL,
|
||||
SmallVectorImpl<Value *> &Left,
|
||||
SmallVectorImpl<Value *> &Right) {
|
||||
|
||||
if (VL.size()) {
|
||||
if (!VL.empty()) {
|
||||
// Peel the first iteration out of the loop since there's nothing
|
||||
// interesting to do anyway and it simplifies the checks in the loop.
|
||||
auto *I = cast<Instruction>(VL[0]);
|
||||
@ -2556,14 +2595,13 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
|
||||
}
|
||||
|
||||
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL, Value *OpValue) {
|
||||
|
||||
// Get the basic block this bundle is in. All instructions in the bundle
|
||||
// should be in this block.
|
||||
auto *Front = cast<Instruction>(OpValue);
|
||||
auto *BB = Front->getParent();
|
||||
const unsigned Opcode = cast<Instruction>(OpValue)->getOpcode();
|
||||
const unsigned AltOpcode = getAltOpcode(Opcode);
|
||||
assert(all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
|
||||
assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
|
||||
return !sameOpcodeOrAlt(Opcode, AltOpcode,
|
||||
cast<Instruction>(V)->getOpcode()) ||
|
||||
cast<Instruction>(V)->getParent() == BB;
|
||||
@ -3082,7 +3120,6 @@ Value *BoUpSLP::vectorizeTree() {
|
||||
|
||||
Value *
|
||||
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||
|
||||
// All blocks must be scheduled before any instructions are inserted.
|
||||
for (auto &BSIter : BlocksSchedules) {
|
||||
scheduleBlock(BSIter.second.get());
|
||||
@ -3482,7 +3519,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
|
||||
BasicBlock::reverse_iterator UpperEnd = BB->rend();
|
||||
BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
|
||||
BasicBlock::iterator LowerEnd = BB->end();
|
||||
for (;;) {
|
||||
while (true) {
|
||||
if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
|
||||
DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
|
||||
return false;
|
||||
@ -3696,7 +3733,6 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
|
||||
}
|
||||
|
||||
void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
|
||||
|
||||
if (!BS->ScheduleStart)
|
||||
return;
|
||||
|
||||
@ -3828,7 +3864,6 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
|
||||
static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
|
||||
SmallVectorImpl<Value *> &ToDemote,
|
||||
SmallVectorImpl<Value *> &Roots) {
|
||||
|
||||
// We can always demote constants.
|
||||
if (isa<Constant>(V)) {
|
||||
ToDemote.push_back(V);
|
||||
@ -3971,7 +4006,7 @@ void BoUpSLP::computeMinimumValueSizes() {
|
||||
|
||||
// Determine if the sign bit of all the roots is known to be zero. If not,
|
||||
// IsKnownPositive is set to False.
|
||||
IsKnownPositive = all_of(TreeRoot, [&](Value *R) {
|
||||
IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) {
|
||||
KnownBits Known = computeKnownBits(R, *DL);
|
||||
return Known.isNonNegative();
|
||||
});
|
||||
@ -3979,7 +4014,7 @@ void BoUpSLP::computeMinimumValueSizes() {
|
||||
// Determine the maximum number of bits required to store the scalar
|
||||
// values.
|
||||
for (auto *Scalar : ToDemote) {
|
||||
auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT);
|
||||
auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT);
|
||||
auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
|
||||
MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
|
||||
}
|
||||
@ -4024,6 +4059,7 @@ void BoUpSLP::computeMinimumValueSizes() {
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// The SLPVectorizer Pass.
|
||||
struct SLPVectorizer : public FunctionPass {
|
||||
SLPVectorizerPass Impl;
|
||||
@ -4035,7 +4071,6 @@ struct SLPVectorizer : public FunctionPass {
|
||||
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
||||
bool doInitialization(Module &M) override {
|
||||
return false;
|
||||
}
|
||||
@ -4075,6 +4110,7 @@ struct SLPVectorizer : public FunctionPass {
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
|
||||
@ -4221,7 +4257,9 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
|
||||
DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
|
||||
if (Cost < -SLPCostThreshold) {
|
||||
DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
|
||||
|
||||
using namespace ore;
|
||||
|
||||
R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
|
||||
cast<StoreInst>(Chain[i]))
|
||||
<< "Stores SLP vectorized with cost " << NV("Cost", Cost)
|
||||
@ -4310,7 +4348,6 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
|
||||
}
|
||||
|
||||
void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
|
||||
|
||||
// Initialize the collections. We will make a single pass over the block.
|
||||
Stores.clear();
|
||||
GEPs.clear();
|
||||
@ -4319,7 +4356,6 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
|
||||
// Stores and GEPs according to the underlying objects of their pointer
|
||||
// operands.
|
||||
for (Instruction &I : *BB) {
|
||||
|
||||
// Ignore store instructions that are volatile or have a pointer operand
|
||||
// that doesn't point to a scalar type.
|
||||
if (auto *SI = dyn_cast<StoreInst>(&I)) {
|
||||
@ -4557,6 +4593,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// Model horizontal reductions.
|
||||
///
|
||||
/// A horizontal reduction is a tree of reduction operations (currently add and
|
||||
@ -4594,10 +4631,13 @@ class HorizontalReduction {
|
||||
struct OperationData {
|
||||
/// true if the operation is a reduced value, false if reduction operation.
|
||||
bool IsReducedValue = false;
|
||||
|
||||
/// Opcode of the instruction.
|
||||
unsigned Opcode = 0;
|
||||
|
||||
/// Left operand of the reduction operation.
|
||||
Value *LHS = nullptr;
|
||||
|
||||
/// Right operand of the reduction operation.
|
||||
Value *RHS = nullptr;
|
||||
|
||||
@ -4610,40 +4650,48 @@ class HorizontalReduction {
|
||||
|
||||
public:
|
||||
explicit OperationData() = default;
|
||||
|
||||
/// Construction for reduced values. They are identified by opcode only and
|
||||
/// don't have associated LHS/RHS values.
|
||||
explicit OperationData(Value *V) : IsReducedValue(true) {
|
||||
if (auto *I = dyn_cast<Instruction>(V))
|
||||
Opcode = I->getOpcode();
|
||||
}
|
||||
|
||||
/// Constructor for binary reduction operations with opcode and its left and
|
||||
/// right operands.
|
||||
OperationData(unsigned Opcode, Value *LHS, Value *RHS)
|
||||
: IsReducedValue(false), Opcode(Opcode), LHS(LHS), RHS(RHS) {}
|
||||
: Opcode(Opcode), LHS(LHS), RHS(RHS) {}
|
||||
|
||||
explicit operator bool() const { return Opcode; }
|
||||
|
||||
/// Get the index of the first operand.
|
||||
unsigned getFirstOperandIndex() const {
|
||||
assert(!!*this && "The opcode is not set.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Total number of operands in the reduction operation.
|
||||
unsigned getNumberOfOperands() const {
|
||||
assert(!IsReducedValue && !!*this && LHS && RHS &&
|
||||
"Expected reduction operation.");
|
||||
return 2;
|
||||
}
|
||||
|
||||
/// Expected number of uses for reduction operations/reduced values.
|
||||
unsigned getRequiredNumberOfUses() const {
|
||||
assert(!IsReducedValue && !!*this && LHS && RHS &&
|
||||
"Expected reduction operation.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Checks if instruction is associative and can be vectorized.
|
||||
bool isAssociative(Instruction *I) const {
|
||||
assert(!IsReducedValue && *this && LHS && RHS &&
|
||||
"Expected reduction operation.");
|
||||
return I->isAssociative();
|
||||
}
|
||||
|
||||
/// Checks if the reduction operation can be vectorized.
|
||||
bool isVectorizable(Instruction *I) const {
|
||||
return isVectorizable() && isAssociative(I);
|
||||
@ -4665,13 +4713,16 @@ class HorizontalReduction {
|
||||
LHS = nullptr;
|
||||
RHS = nullptr;
|
||||
}
|
||||
|
||||
/// Get the opcode of the reduction operation.
|
||||
unsigned getOpcode() const {
|
||||
assert(isVectorizable() && "Expected vectorizable operation.");
|
||||
return Opcode;
|
||||
}
|
||||
|
||||
Value *getLHS() const { return LHS; }
|
||||
Value *getRHS() const { return RHS; }
|
||||
|
||||
/// Creates reduction operation with the current opcode.
|
||||
Value *createOp(IRBuilder<> &Builder, const Twine &Name = "") const {
|
||||
assert(!IsReducedValue &&
|
||||
@ -4686,8 +4737,10 @@ class HorizontalReduction {
|
||||
|
||||
/// The operation data of the reduction operation.
|
||||
OperationData ReductionData;
|
||||
|
||||
/// The operation data of the values we perform a reduction on.
|
||||
OperationData ReducedValueData;
|
||||
|
||||
/// Should we model this reduction as a pairwise reduction tree or a tree that
|
||||
/// splits the vector in halves and adds those halves.
|
||||
bool IsPairwiseReduction = false;
|
||||
@ -5018,6 +5071,7 @@ private:
|
||||
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// \brief Recognize construction of vectors like
|
||||
@ -5425,7 +5479,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
||||
bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
|
||||
auto Changed = false;
|
||||
for (auto &Entry : GEPs) {
|
||||
|
||||
// If the getelementptr list has fewer than two elements, there's nothing
|
||||
// to do.
|
||||
if (Entry.second.size() < 2)
|
||||
@ -5530,7 +5583,9 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
|
||||
}
|
||||
|
||||
char SLPVectorizer::ID = 0;
|
||||
|
||||
static const char lv_name[] = "SLP Vectorizer";
|
||||
|
||||
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
@ -5541,6 +5596,4 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
||||
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
|
||||
|
||||
namespace llvm {
|
||||
Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }
|
||||
}
|
||||
Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user