mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
5b57ac22c4
Summary: Avoid re-examining operands on recursive walk looking for CTR. This was causing huge compile time after some earlier optimization created a large expression. The start of the expression (created by IndVarSimplify) looked like: %469 = lshr i64 trunc (i128 xor (i128 udiv (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011), i128 64), i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011)) to i64), i64 45) to i128), i128 8192506886679785011), i128 64), i128 mul (i128 zext (i64 add (i64 trunc (i128 xor (i128 lshr (i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011), i128 64), i128 mul (i128 zext (i64 add (i64 ptrtoint (i8 @_ZN4absl13hash_internal13CityHashState5kSeedE to i64), i64 120) to i128), i128 8192506886679785011)) to i64), i64 45) to i128), ... with the _ZN4absl13hash_internal13CityHashState5kSeedE referenced many times. Reviewers: hfinkel Subscribers: nemanjai, hiraditya, kbarton, jsji, shchenz, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D75790
128 lines
5.5 KiB
C++
128 lines
5.5 KiB
C++
//===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// This file a TargetTransformInfo::Concept conforming object specific to the
|
|
/// PPC target machine. It uses the target's detailed information to
|
|
/// provide more precise answers to certain TTI queries, while letting the
|
|
/// target independent and default TTI implementations handle the rest.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
|
|
#define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
|
|
|
|
#include "PPCTargetMachine.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/CodeGen/BasicTTIImpl.h"
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
|
|
|
namespace llvm {
|
|
|
|
class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
|
|
typedef BasicTTIImplBase<PPCTTIImpl> BaseT;
|
|
typedef TargetTransformInfo TTI;
|
|
friend BaseT;
|
|
|
|
const PPCSubtarget *ST;
|
|
const PPCTargetLowering *TLI;
|
|
|
|
const PPCSubtarget *getST() const { return ST; }
|
|
const PPCTargetLowering *getTLI() const { return TLI; }
|
|
bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
|
|
SmallPtrSetImpl<const Value *> &Visited);
|
|
|
|
public:
|
|
explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
|
|
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
|
TLI(ST->getTargetLowering()) {}
|
|
|
|
/// \name Scalar TTI Implementations
|
|
/// @{
|
|
|
|
using BaseT::getIntImmCost;
|
|
int getIntImmCost(const APInt &Imm, Type *Ty);
|
|
|
|
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
|
|
Type *Ty);
|
|
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
|
|
Type *Ty);
|
|
|
|
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
|
|
|
|
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
|
|
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
|
AssumptionCache &AC,
|
|
TargetLibraryInfo *LibInfo,
|
|
HardwareLoopInfo &HWLoopInfo);
|
|
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
|
|
DominatorTree *DT, AssumptionCache *AC,
|
|
TargetLibraryInfo *LibInfo);
|
|
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|
TTI::UnrollingPreferences &UP);
|
|
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
|
TargetTransformInfo::LSRCost &C2);
|
|
|
|
/// @}
|
|
|
|
/// \name Vector TTI Implementations
|
|
/// @{
|
|
bool useColdCCForColdCall(Function &F);
|
|
bool enableAggressiveInterleaving(bool LoopHasReductions);
|
|
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
|
|
bool IsZeroCmp) const;
|
|
bool enableInterleavedAccessVectorization();
|
|
|
|
enum PPCRegisterClass {
|
|
GPRRC, FPRRC, VRRC, VSXRC
|
|
};
|
|
unsigned getNumberOfRegisters(unsigned ClassID) const;
|
|
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
|
|
const char* getRegisterClassName(unsigned ClassID) const;
|
|
unsigned getRegisterBitWidth(bool Vector) const;
|
|
unsigned getCacheLineSize() const override;
|
|
unsigned getPrefetchDistance() const override;
|
|
unsigned getMaxInterleaveFactor(unsigned VF);
|
|
int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
|
|
int getArithmeticInstrCost(
|
|
unsigned Opcode, Type *Ty,
|
|
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
|
|
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
|
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
|
const Instruction *CxtI = nullptr);
|
|
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
|
|
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|
const Instruction *I = nullptr);
|
|
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|
const Instruction *I = nullptr);
|
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
|
unsigned AddressSpace, const Instruction *I = nullptr);
|
|
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
|
unsigned Factor,
|
|
ArrayRef<unsigned> Indices,
|
|
unsigned Alignment,
|
|
unsigned AddressSpace,
|
|
bool UseMaskForCond = false,
|
|
bool UseMaskForGaps = false);
|
|
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
|
|
ArrayRef<Value *> Args, FastMathFlags FMF,
|
|
unsigned VF, const Instruction *I = nullptr);
|
|
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
|
|
ArrayRef<Type *> Tys, FastMathFlags FMF,
|
|
unsigned ScalarizationCostPassed = UINT_MAX,
|
|
const Instruction *I = nullptr);
|
|
|
|
/// @}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif
|