1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[Aarch64] Add pass LoopDataPrefetch for Cyclone

Summary:
This wires up the pass for Cyclone but keeps it off for now because we
need a few more TTIs.

The getPrefetchMinStride value is not very well tuned right now but it
works well with CFP2006/433.milc which motivated this.

Tests will be added as part of the upcoming large-stride prefetching
patch.

Reviewers: t.p.northover

Subscribers: llvm-commits, aemerson, hfinkel, rengolin

Differential Revision: http://reviews.llvm.org/D17943

llvm-svn: 263770
This commit is contained in:
Adam Nemet 2016-03-18 00:27:29 +00:00
parent c99e69af78
commit 504bc5d49e
3 changed files with 34 additions and 0 deletions

View File

@ -101,6 +101,11 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));
static cl::opt<bool>
EnableLoopDataPrefetch("aarch64-loop-data-prefetch", cl::Hidden,
cl::desc("Enable the loop data prefetch pass"),
cl::init(false));
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@ -236,6 +241,14 @@ void AArch64PassConfig::addIRPasses() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass());
// Run LoopDataPrefetch for Cyclone (the only subtarget that defines a
// non-zero getPrefetchDistance).
//
// Run this before LSR to remove the multiplies involved in computing the
// pointer values N iterations ahead.
if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
addPass(createLoopDataPrefetchPass());
TargetPassConfig::addIRPasses();
// Match interleaved memory accesses to ldN/stN intrinsics.

View File

@ -20,6 +20,11 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
static cl::opt<unsigned> CyclonePrefetchDistance(
"cyclone-prefetch-distance",
cl::desc("Number of instructions to prefetch ahead for Cyclone"),
cl::init(280), cl::Hidden);
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
@ -573,3 +578,15 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
}
return true;
}
unsigned AArch64TTIImpl::getCacheLineSize() {
if (ST->isCyclone())
return 64;
return BaseT::getCacheLineSize();
}
unsigned AArch64TTIImpl::getPrefetchDistance() {
if (ST->isCyclone())
return CyclonePrefetchDistance;
return BaseT::getPrefetchDistance();
}

View File

@ -127,6 +127,10 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
/// @}
};