mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
8980610845
For a long time, the InstCombine pass handled target specific intrinsics. Having target specific code in general passes was noted as an area for improvement for a long time. D81728 moves most target specific code out of the InstCombine pass. Applying the target specific combinations in an extra pass would probably result in inferior optimizations compared to the current fixed-point iteration, therefore the InstCombine pass resorts to newly introduced functions in the TargetTransformInfo when it encounters unknown intrinsics. The patch should not have any effect on generated code (under the assumption that code never uses intrinsics from a foreign target). This introduces three new functions: TargetTransformInfo::instCombineIntrinsic TargetTransformInfo::simplifyDemandedUseBitsIntrinsic TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic A few target specific parts are left in the InstCombine folder, where it makes sense to share code. The largest left-over part in InstCombineCalls.cpp is the code shared between arm and aarch64. This allows to move about 3000 lines out from InstCombine to the targets. Differential Revision: https://reviews.llvm.org/D81728
126 lines
4.8 KiB
C++
126 lines
4.8 KiB
C++
//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// This file a TargetTransformInfo::Concept conforming object specific to the
|
|
/// NVPTX target machine. It uses the target's detailed information to
|
|
/// provide more precise answers to certain TTI queries, while letting the
|
|
/// target independent and default TTI implementations handle the rest.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
|
|
#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
|
|
|
|
#include "NVPTXTargetMachine.h"
|
|
#include "MCTargetDesc/NVPTXBaseInfo.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/CodeGen/BasicTTIImpl.h"
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
|
|
|
namespace llvm {
|
|
|
|
class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
|
|
typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
|
|
typedef TargetTransformInfo TTI;
|
|
friend BaseT;
|
|
|
|
const NVPTXSubtarget *ST;
|
|
const NVPTXTargetLowering *TLI;
|
|
|
|
const NVPTXSubtarget *getST() const { return ST; };
|
|
const NVPTXTargetLowering *getTLI() const { return TLI; };
|
|
|
|
public:
|
|
explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
|
|
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
|
|
TLI(ST->getTargetLowering()) {}
|
|
|
|
bool hasBranchDivergence() { return true; }
|
|
|
|
bool isSourceOfDivergence(const Value *V);
|
|
|
|
unsigned getFlatAddressSpace() const {
|
|
return AddressSpace::ADDRESS_SPACE_GENERIC;
|
|
}
|
|
|
|
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
|
IntrinsicInst &II) const;
|
|
|
|
// Loads and stores can be vectorized if the alignment is at least as big as
|
|
// the load/store we want to vectorize.
|
|
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
|
|
unsigned AddrSpace) const {
|
|
return Alignment >= ChainSizeInBytes;
|
|
}
|
|
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
|
|
unsigned AddrSpace) const {
|
|
return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
|
|
}
|
|
|
|
// NVPTX has infinite registers of all kinds, but the actual machine doesn't.
|
|
// We conservatively return 1 here which is just enough to enable the
|
|
// vectorizers but disables heuristics based on the number of registers.
|
|
// FIXME: Return a more reasonable number, while keeping an eye on
|
|
// LoopVectorizer's unrolling heuristics.
|
|
unsigned getNumberOfRegisters(bool Vector) const { return 1; }
|
|
|
|
// Only <2 x half> should be vectorized, so always return 32 for the vector
|
|
// register size.
|
|
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
|
|
unsigned getMinVectorRegisterBitWidth() const { return 32; }
|
|
|
|
// We don't want to prevent inlining because of target-cpu and -features
|
|
// attributes that were added to newer versions of LLVM/Clang: There are
|
|
// no incompatible functions in PTX, ptxas will throw errors in such cases.
|
|
bool areInlineCompatible(const Function *Caller,
|
|
const Function *Callee) const {
|
|
return true;
|
|
}
|
|
|
|
// Increase the inlining cost threshold by a factor of 5, reflecting that
|
|
// calls are particularly expensive in NVPTX.
|
|
unsigned getInliningThresholdMultiplier() { return 5; }
|
|
|
|
int getArithmeticInstrCost(
|
|
unsigned Opcode, Type *Ty,
|
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
|
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
|
|
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
|
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
|
const Instruction *CxtI = nullptr);
|
|
|
|
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|
TTI::UnrollingPreferences &UP);
|
|
|
|
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
|
TTI::PeelingPreferences &PP);
|
|
|
|
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
|
|
// Volatile loads/stores are only supported for shared and global address
|
|
// spaces, or for generic AS that maps to them.
|
|
if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
|
|
AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
|
|
AddrSpace == llvm::ADDRESS_SPACE_SHARED))
|
|
return false;
|
|
|
|
switch(I->getOpcode()){
|
|
default:
|
|
return false;
|
|
case Instruction::Load:
|
|
case Instruction::Store:
|
|
return true;
|
|
}
|
|
}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif
|