1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 13:02:52 +02:00
llvm-mirror/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Matt Arsenault 6b427f77ee LoadStoreVectorizer: Split even sized illegal chains properly
Implement isLegalToVectorizeLoadChain for AMDGPU to avoid
producing private address spaces accesses that will need to be
split up later. This was doing the wrong thing in the case
where the queried chain was an even number of elements.

A possible <4 x i32> store was being split into
store <2 x i32>
store i32
store i32

rather than
store <2 x i32>
store <2 x i32>

when legal.

llvm-svn: 295933
2017-02-23 03:58:53 +00:00

122 lines
4.2 KiB
C++

//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// This file a TargetTransformInfo::Concept conforming object specific to the
/// AMDGPU target machine. It uses the target's detailed information to
/// provide more precise answers to certain TTI queries, while letting the
/// target independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
namespace llvm {
class AMDGPUTargetLowering;
class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
typedef TargetTransformInfo TTI;
friend BaseT;
const AMDGPUSubtarget *ST;
const AMDGPUTargetLowering *TLI;
bool IsGraphicsShader;
const AMDGPUSubtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
static inline int getFullRateInstrCost() {
return TargetTransformInfo::TCC_Basic;
}
static inline int getHalfRateInstrCost() {
return 2 * TargetTransformInfo::TCC_Basic;
}
// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
// should be 2 or 4.
static inline int getQuarterRateInstrCost() {
return 3 * TargetTransformInfo::TCC_Basic;
}
// On some parts, normal fp64 operations are half rate, and others
// quarter. This also applies to some integer operations.
inline int get64BitInstrCost() const {
return ST->hasHalfRate64Ops() ?
getHalfRateInstrCost() : getQuarterRateInstrCost();
}
public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
bool hasBranchDivergence() { return true; }
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
return TTI::PSK_FastHardware;
}
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const;
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const;
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const;
unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
unsigned getCFInstrCost(unsigned Opcode);
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
unsigned getFlatAddressSpace() const {
// Don't bother running InferAddressSpaces pass on graphics shaders which
// don't use flat addressing.
if (IsGraphicsShader)
return -1;
return ST->hasFlatAddressSpace() ?
AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
}
unsigned getVectorSplitCost() { return 0; }
};
} // end namespace llvm
#endif