mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
fd61052e59
This patch removes the IsPairwiseForm flag from the Reduction Cost TTI hooks, along with some accompanying code for pattern matching reductions from trees starting at extract elements. IsPairWise is now assumed to be false, which was the predominant way that the value was used from both the Loop and SLP vectorizers. Since the adjustments such as D93860, the SLP vectorizer has not relied upon this distinction between paiwise and non-pairwise reductions. This also removes some code that was detecting reductions trees starting from extract elements inside the costmodel. This case was double-counting costs though, adding the individual costs on the individual instruction _and_ the total cost of the reduction. Removing it changes the costs in llvm/test/Analysis/CostModel/X86/reduction.ll to not double count. The cost of reduction intrinsics is still tested through the various tests in llvm/test/Analysis/CostModel/X86/reduce-xyz.ll. Differential Revision: https://reviews.llvm.org/D105484
263 lines
11 KiB
C++
263 lines
11 KiB
C++
//===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// This file a TargetTransformInfo::Concept conforming object specific to the
|
|
/// X86 target machine. It uses the target's detailed information to
|
|
/// provide more precise answers to certain TTI queries, while letting the
|
|
/// target independent and default TTI implementations handle the rest.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
|
|
#define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
|
|
|
|
#include "X86TargetMachine.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/CodeGen/BasicTTIImpl.h"
|
|
|
|
namespace llvm {
|
|
|
|
class InstCombiner;
|
|
|
|
class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
|
|
typedef BasicTTIImplBase<X86TTIImpl> BaseT;
|
|
typedef TargetTransformInfo TTI;
|
|
friend BaseT;
|
|
|
|
const X86Subtarget *ST;
|
|
const X86TargetLowering *TLI;
|
|
|
|
const X86Subtarget *getST() const { return ST; }
|
|
const X86TargetLowering *getTLI() const { return TLI; }
|
|
|
|
const FeatureBitset InlineFeatureIgnoreList = {
|
|
// This indicates the CPU is 64 bit capable not that we are in 64-bit
|
|
// mode.
|
|
X86::Feature64Bit,
|
|
|
|
// These features don't have any intrinsics or ABI effect.
|
|
X86::FeatureNOPL,
|
|
X86::FeatureCMPXCHG16B,
|
|
X86::FeatureLAHFSAHF,
|
|
|
|
// Codegen control options.
|
|
X86::FeatureFast11ByteNOP,
|
|
X86::FeatureFast15ByteNOP,
|
|
X86::FeatureFastBEXTR,
|
|
X86::FeatureFastHorizontalOps,
|
|
X86::FeatureFastLZCNT,
|
|
X86::FeatureFastScalarFSQRT,
|
|
X86::FeatureFastSHLDRotate,
|
|
X86::FeatureFastScalarShiftMasks,
|
|
X86::FeatureFastVectorShiftMasks,
|
|
X86::FeatureFastVariableCrossLaneShuffle,
|
|
X86::FeatureFastVariablePerLaneShuffle,
|
|
X86::FeatureFastVectorFSQRT,
|
|
X86::FeatureLEAForSP,
|
|
X86::FeatureLEAUsesAG,
|
|
X86::FeatureLZCNTFalseDeps,
|
|
X86::FeatureBranchFusion,
|
|
X86::FeatureMacroFusion,
|
|
X86::FeaturePadShortFunctions,
|
|
X86::FeaturePOPCNTFalseDeps,
|
|
X86::FeatureSSEUnalignedMem,
|
|
X86::FeatureSlow3OpsLEA,
|
|
X86::FeatureSlowDivide32,
|
|
X86::FeatureSlowDivide64,
|
|
X86::FeatureSlowIncDec,
|
|
X86::FeatureSlowLEA,
|
|
X86::FeatureSlowPMADDWD,
|
|
X86::FeatureSlowPMULLD,
|
|
X86::FeatureSlowSHLD,
|
|
X86::FeatureSlowTwoMemOps,
|
|
X86::FeatureSlowUAMem16,
|
|
X86::FeaturePreferMaskRegisters,
|
|
X86::FeatureInsertVZEROUPPER,
|
|
X86::FeatureUseGLMDivSqrtCosts,
|
|
|
|
// Perf-tuning flags.
|
|
X86::FeatureHasFastGather,
|
|
X86::FeatureSlowUAMem32,
|
|
|
|
// Based on whether user set the -mprefer-vector-width command line.
|
|
X86::FeaturePrefer128Bit,
|
|
X86::FeaturePrefer256Bit,
|
|
|
|
// CPU name enums. These just follow CPU string.
|
|
X86::ProcIntelAtom,
|
|
X86::ProcIntelSLM,
|
|
};
|
|
|
|
public:
|
|
explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
|
|
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
|
TLI(ST->getTargetLowering()) {}
|
|
|
|
/// \name Scalar TTI Implementations
|
|
/// @{
|
|
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
|
|
|
|
/// @}
|
|
|
|
/// \name Cache TTI Implementation
|
|
/// @{
|
|
llvm::Optional<unsigned> getCacheSize(
|
|
TargetTransformInfo::CacheLevel Level) const override;
|
|
llvm::Optional<unsigned> getCacheAssociativity(
|
|
TargetTransformInfo::CacheLevel Level) const override;
|
|
/// @}
|
|
|
|
/// \name Vector TTI Implementations
|
|
/// @{
|
|
|
|
unsigned getNumberOfRegisters(unsigned ClassID) const;
|
|
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
|
|
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
|
|
unsigned getMaxInterleaveFactor(unsigned VF);
|
|
InstructionCost getArithmeticInstrCost(
|
|
unsigned Opcode, Type *Ty,
|
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
|
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
|
|
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
|
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
|
const Instruction *CxtI = nullptr);
|
|
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
|
|
ArrayRef<int> Mask, int Index,
|
|
VectorType *SubTp);
|
|
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|
TTI::CastContextHint CCH,
|
|
TTI::TargetCostKind CostKind,
|
|
const Instruction *I = nullptr);
|
|
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|
CmpInst::Predicate VecPred,
|
|
TTI::TargetCostKind CostKind,
|
|
const Instruction *I = nullptr);
|
|
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
|
|
unsigned Index);
|
|
InstructionCost getScalarizationOverhead(VectorType *Ty,
|
|
const APInt &DemandedElts,
|
|
bool Insert, bool Extract);
|
|
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
|
|
MaybeAlign Alignment, unsigned AddressSpace,
|
|
TTI::TargetCostKind CostKind,
|
|
const Instruction *I = nullptr);
|
|
InstructionCost
|
|
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
|
|
unsigned AddressSpace,
|
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
|
|
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
|
|
const Value *Ptr, bool VariableMask,
|
|
Align Alignment,
|
|
TTI::TargetCostKind CostKind,
|
|
const Instruction *I);
|
|
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
|
|
const SCEV *Ptr);
|
|
|
|
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
|
IntrinsicInst &II) const;
|
|
Optional<Value *>
|
|
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
|
|
APInt DemandedMask, KnownBits &Known,
|
|
bool &KnownBitsComputed) const;
|
|
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
|
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
|
APInt &UndefElts2, APInt &UndefElts3,
|
|
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
|
SimplifyAndSetOp) const;
|
|
|
|
unsigned getAtomicMemIntrinsicMaxElementSize() const;
|
|
|
|
InstructionCost
|
|
getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|
TTI::TargetCostKind CostKind);
|
|
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|
TTI::TargetCostKind CostKind);
|
|
|
|
InstructionCost getArithmeticReductionCost(
|
|
unsigned Opcode, VectorType *Ty,
|
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
|
|
|
|
InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
|
|
|
|
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
|
bool IsUnsigned,
|
|
TTI::TargetCostKind CostKind);
|
|
|
|
InstructionCost getInterleavedMemoryOpCost(
|
|
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
|
Align Alignment, unsigned AddressSpace,
|
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
|
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
|
InstructionCost getInterleavedMemoryOpCostAVX512(
|
|
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
|
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
|
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
|
InstructionCost getInterleavedMemoryOpCostAVX2(
|
|
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
|
|
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
|
|
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
|
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
|
|
|
InstructionCost getIntImmCost(int64_t);
|
|
|
|
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
|
|
TTI::TargetCostKind CostKind);
|
|
|
|
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
|
const Instruction *I = nullptr);
|
|
|
|
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
|
|
const APInt &Imm, Type *Ty,
|
|
TTI::TargetCostKind CostKind,
|
|
Instruction *Inst = nullptr);
|
|
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
|
|
const APInt &Imm, Type *Ty,
|
|
TTI::TargetCostKind CostKind);
|
|
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
|
TargetTransformInfo::LSRCost &C2);
|
|
bool canMacroFuseCmp();
|
|
bool isLegalMaskedLoad(Type *DataType, Align Alignment);
|
|
bool isLegalMaskedStore(Type *DataType, Align Alignment);
|
|
bool isLegalNTLoad(Type *DataType, Align Alignment);
|
|
bool isLegalNTStore(Type *DataType, Align Alignment);
|
|
bool isLegalMaskedGather(Type *DataType, Align Alignment);
|
|
bool isLegalMaskedScatter(Type *DataType, Align Alignment);
|
|
bool isLegalMaskedExpandLoad(Type *DataType);
|
|
bool isLegalMaskedCompressStore(Type *DataType);
|
|
bool hasDivRemOp(Type *DataType, bool IsSigned);
|
|
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
|
|
bool areInlineCompatible(const Function *Caller,
|
|
const Function *Callee) const;
|
|
bool areFunctionArgsABICompatible(const Function *Caller,
|
|
const Function *Callee,
|
|
SmallPtrSetImpl<Argument *> &Args) const;
|
|
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
|
|
bool IsZeroCmp) const;
|
|
bool enableInterleavedAccessVectorization();
|
|
|
|
private:
|
|
InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
|
|
bool VariableMask, Align Alignment,
|
|
unsigned AddressSpace);
|
|
InstructionCost getGSVectorCost(unsigned Opcode, Type *DataTy,
|
|
const Value *Ptr, Align Alignment,
|
|
unsigned AddressSpace);
|
|
|
|
int getGatherOverhead() const;
|
|
int getScatterOverhead() const;
|
|
|
|
/// @}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif
|