mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[AMDGPU] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).
llvm-svn: 310429
This commit is contained in:
parent
3f63039f98
commit
3ed10e19e4
@ -1,4 +1,4 @@
|
||||
//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
|
||||
//===- AMDGPUAliasAnalysis ------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -12,13 +12,21 @@
|
||||
|
||||
#include "AMDGPUAliasAnalysis.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include <cassert>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -26,6 +34,7 @@ using namespace llvm;
|
||||
|
||||
// Register this pass...
|
||||
char AMDGPUAAWrapperPass::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
|
||||
"AMDGPU Address space based Alias Analysis", false, true)
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
|
||||
//===- AMDGPUAliasAnalysis --------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -10,17 +10,24 @@
|
||||
/// This is the AMGPU address space based alias analysis pass.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
|
||||
#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class DataLayout;
|
||||
class MDNode;
|
||||
class MemoryLocation;
|
||||
|
||||
/// A simple AA result that uses TBAA metadata to answer queries.
|
||||
class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
|
||||
friend AAResultBase<AMDGPUAAResult>;
|
||||
@ -50,7 +57,9 @@ private:
|
||||
class ASAliasRulesTy {
|
||||
public:
|
||||
ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
|
||||
|
||||
AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
|
||||
|
||||
private:
|
||||
Triple::ArchType Arch;
|
||||
AMDGPUAS AS;
|
||||
@ -61,10 +70,11 @@ private:
|
||||
/// Analysis pass providing a never-invalidated alias analysis result.
|
||||
class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
|
||||
friend AnalysisInfoMixin<AMDGPUAA>;
|
||||
|
||||
static char PassID;
|
||||
|
||||
public:
|
||||
typedef AMDGPUAAResult Result;
|
||||
using Result = AMDGPUAAResult;
|
||||
|
||||
AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
|
||||
return AMDGPUAAResult(F.getParent()->getDataLayout(),
|
||||
@ -91,12 +101,15 @@ public:
|
||||
Triple(M.getTargetTriple())));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool doFinalization(Module &M) override {
|
||||
Result.reset();
|
||||
return false;
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
};
|
||||
|
||||
}
|
||||
#endif // LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
|
||||
|
@ -14,46 +14,55 @@
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegionInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/IR/DebugLoc.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetOpcodes.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include <cassert>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "amdgpucfgstructurizer"
|
||||
|
||||
namespace {
|
||||
|
||||
class PHILinearizeDestIterator;
|
||||
|
||||
class PHILinearize {
|
||||
friend class PHILinearizeDestIterator;
|
||||
|
||||
public:
|
||||
typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT;
|
||||
using PHISourceT = std::pair<unsigned, MachineBasicBlock *>;
|
||||
|
||||
private:
|
||||
typedef DenseSet<PHISourceT> PHISourcesT;
|
||||
typedef struct {
|
||||
using PHISourcesT = DenseSet<PHISourceT>;
|
||||
using PHIInfoElementT = struct {
|
||||
unsigned DestReg;
|
||||
DebugLoc DL;
|
||||
PHISourcesT Sources;
|
||||
} PHIInfoElementT;
|
||||
typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT;
|
||||
};
|
||||
using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>;
|
||||
PHIInfoT PHIInfo;
|
||||
|
||||
static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
|
||||
@ -85,8 +94,8 @@ public:
|
||||
void dump(MachineRegisterInfo *MRI);
|
||||
void clear();
|
||||
|
||||
typedef PHISourcesT::iterator source_iterator;
|
||||
typedef PHILinearizeDestIterator dest_iterator;
|
||||
using source_iterator = PHISourcesT::iterator;
|
||||
using dest_iterator = PHILinearizeDestIterator;
|
||||
|
||||
dest_iterator dests_begin();
|
||||
dest_iterator dests_end();
|
||||
@ -100,6 +109,8 @@ private:
|
||||
PHILinearize::PHIInfoT::iterator Iter;
|
||||
|
||||
public:
|
||||
PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
|
||||
|
||||
unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
|
||||
PHILinearizeDestIterator &operator++() {
|
||||
++Iter;
|
||||
@ -111,10 +122,10 @@ public:
|
||||
bool operator!=(const PHILinearizeDestIterator &I) const {
|
||||
return I.Iter != Iter;
|
||||
}
|
||||
|
||||
PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
|
||||
return Info->DestReg;
|
||||
}
|
||||
@ -282,14 +293,12 @@ PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
|
||||
auto InfoElement = findPHIInfoElement(Reg);
|
||||
return phiInfoElementGetSources(InfoElement).begin();
|
||||
}
|
||||
|
||||
PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
|
||||
auto InfoElement = findPHIInfoElement(Reg);
|
||||
return phiInfoElementGetSources(InfoElement).end();
|
||||
}
|
||||
|
||||
class RegionMRT;
|
||||
class MBBMRT;
|
||||
|
||||
static unsigned getPHINumInputs(MachineInstr &PHI) {
|
||||
assert(PHI.isPHI());
|
||||
return (PHI.getNumOperands() - 1) / 2;
|
||||
@ -315,6 +324,11 @@ static unsigned getPHIDestReg(MachineInstr &PHI) {
|
||||
return PHI.getOperand(0).getReg();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class RegionMRT;
|
||||
class MBBMRT;
|
||||
|
||||
class LinearizedRegion {
|
||||
protected:
|
||||
MachineBasicBlock *Entry;
|
||||
@ -349,6 +363,11 @@ protected:
|
||||
RegionMRT *TopRegion = nullptr);
|
||||
|
||||
public:
|
||||
LinearizedRegion();
|
||||
LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
|
||||
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
|
||||
~LinearizedRegion() = default;
|
||||
|
||||
void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
|
||||
|
||||
RegionMRT *getRegionMRT() { return RMRT; }
|
||||
@ -413,13 +432,6 @@ public:
|
||||
|
||||
void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
|
||||
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
|
||||
|
||||
LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
|
||||
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
|
||||
|
||||
LinearizedRegion();
|
||||
|
||||
~LinearizedRegion();
|
||||
};
|
||||
|
||||
class MRT {
|
||||
@ -429,6 +441,8 @@ protected:
|
||||
unsigned BBSelectRegOut;
|
||||
|
||||
public:
|
||||
virtual ~MRT() = default;
|
||||
|
||||
unsigned getBBSelectRegIn() { return BBSelectRegIn; }
|
||||
|
||||
unsigned getBBSelectRegOut() { return BBSelectRegOut; }
|
||||
@ -467,42 +481,55 @@ public:
|
||||
dbgs() << " ";
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~MRT() {}
|
||||
};
|
||||
|
||||
class MBBMRT : public MRT {
|
||||
MachineBasicBlock *MBB;
|
||||
|
||||
public:
|
||||
virtual MBBMRT *getMBBMRT() { return this; }
|
||||
|
||||
MachineBasicBlock *getMBB() { return MBB; }
|
||||
|
||||
virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
|
||||
dumpDepth(depth);
|
||||
dbgs() << "MBB: " << getMBB()->getNumber();
|
||||
dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
|
||||
dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
|
||||
}
|
||||
|
||||
MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
|
||||
setParent(nullptr);
|
||||
setBBSelectRegOut(0);
|
||||
setBBSelectRegIn(0);
|
||||
}
|
||||
|
||||
MBBMRT *getMBBMRT() override { return this; }
|
||||
|
||||
MachineBasicBlock *getMBB() { return MBB; }
|
||||
|
||||
void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
|
||||
dumpDepth(depth);
|
||||
dbgs() << "MBB: " << getMBB()->getNumber();
|
||||
dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
|
||||
dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
|
||||
}
|
||||
};
|
||||
|
||||
class RegionMRT : public MRT {
|
||||
protected:
|
||||
MachineRegion *Region;
|
||||
LinearizedRegion *LRegion;
|
||||
MachineBasicBlock *Succ;
|
||||
|
||||
LinearizedRegion *LRegion = nullptr;
|
||||
MachineBasicBlock *Succ = nullptr;
|
||||
SetVector<MRT *> Children;
|
||||
|
||||
public:
|
||||
virtual RegionMRT *getRegionMRT() { return this; }
|
||||
RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) {
|
||||
setParent(nullptr);
|
||||
setBBSelectRegOut(0);
|
||||
setBBSelectRegIn(0);
|
||||
}
|
||||
|
||||
~RegionMRT() override {
|
||||
if (LRegion) {
|
||||
delete LRegion;
|
||||
}
|
||||
|
||||
for (auto CI : Children) {
|
||||
delete &(*CI);
|
||||
}
|
||||
}
|
||||
|
||||
RegionMRT *getRegionMRT() override { return this; }
|
||||
|
||||
void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
|
||||
LRegion = LinearizeRegion;
|
||||
@ -520,7 +547,7 @@ public:
|
||||
|
||||
SetVector<MRT *> *getChildren() { return &Children; }
|
||||
|
||||
virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
|
||||
void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
|
||||
dumpDepth(depth);
|
||||
dbgs() << "Region: " << (void *)Region;
|
||||
dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
|
||||
@ -583,25 +610,10 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RegionMRT(MachineRegion *MachineRegion)
|
||||
: Region(MachineRegion), LRegion(nullptr), Succ(nullptr) {
|
||||
setParent(nullptr);
|
||||
setBBSelectRegOut(0);
|
||||
setBBSelectRegIn(0);
|
||||
}
|
||||
|
||||
virtual ~RegionMRT() {
|
||||
if (LRegion) {
|
||||
delete LRegion;
|
||||
}
|
||||
|
||||
for (auto CI : Children) {
|
||||
delete &(*CI);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
static unsigned createBBSelectReg(const SIInstrInfo *TII,
|
||||
MachineRegisterInfo *MRI) {
|
||||
return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32));
|
||||
@ -1063,7 +1075,7 @@ LinearizedRegion::LinearizedRegion() {
|
||||
Parent = nullptr;
|
||||
}
|
||||
|
||||
LinearizedRegion::~LinearizedRegion() {}
|
||||
namespace {
|
||||
|
||||
class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
|
||||
private:
|
||||
@ -1074,6 +1086,7 @@ private:
|
||||
unsigned BBSelectRegister;
|
||||
PHILinearize PHIInfo;
|
||||
DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
|
||||
RegionMRT *RMRT;
|
||||
|
||||
void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
|
||||
SmallVector<unsigned, 2> &RegionIndices);
|
||||
@ -1197,15 +1210,15 @@ private:
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
|
||||
initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineRegionInfoPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
|
||||
initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
void initFallthroughMap(MachineFunction &MF);
|
||||
|
||||
void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
|
||||
@ -1214,14 +1227,14 @@ public:
|
||||
MachineRegisterInfo *MRI,
|
||||
const SIInstrInfo *TII);
|
||||
|
||||
RegionMRT *RMRT;
|
||||
void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
|
||||
|
||||
RegionMRT *getRegionMRT() { return RMRT; }
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char AMDGPUMachineCFGStructurizer::ID = 0;
|
||||
|
||||
@ -1258,7 +1271,6 @@ void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
|
||||
}
|
||||
|
||||
static void fixMBBTerminator(MachineBasicBlock *MBB) {
|
||||
|
||||
if (MBB->succ_size() == 1) {
|
||||
auto *Succ = *(MBB->succ_begin());
|
||||
for (auto &TI : MBB->terminators()) {
|
||||
@ -1535,7 +1547,6 @@ void AMDGPUMachineCFGStructurizer::replacePHI(
|
||||
void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
|
||||
MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
|
||||
SmallVector<unsigned, 2> &PHIRegionIndices) {
|
||||
|
||||
DEBUG(dbgs() << "Replace entry PHI: ");
|
||||
DEBUG(PHI.dump());
|
||||
DEBUG(dbgs() << " with ");
|
||||
@ -2491,7 +2502,6 @@ AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
|
||||
return NewExit;
|
||||
}
|
||||
|
||||
|
||||
static MachineBasicBlock *split(MachineBasicBlock::iterator I) {
|
||||
// Create the fall-through block.
|
||||
MachineBasicBlock *MBB = (*I).getParent();
|
||||
@ -2845,16 +2855,6 @@ static void checkRegOnlyPHIInputs(MachineFunction &MF) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
|
||||
"AMDGPU Machine CFG Structurizer", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
|
||||
INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
|
||||
"AMDGPU Machine CFG Structurizer", false, false)
|
||||
|
||||
char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
|
||||
|
||||
|
||||
bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
@ -2880,6 +2880,14 @@ bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
return result;
|
||||
}
|
||||
|
||||
char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
|
||||
"AMDGPU Machine CFG Structurizer", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
|
||||
INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
|
||||
"AMDGPU Machine CFG Structurizer", false, false)
|
||||
|
||||
FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() {
|
||||
return new AMDGPUMachineCFGStructurizer();
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- AMDGPUOpenCLImageTypeLoweringPass.cpp -----------------------------===//
|
||||
//===- AMDGPUOpenCLImageTypeLoweringPass.cpp ------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -22,40 +22,57 @@
|
||||
/// Resource IDs of read-only images, write-only images and samplers are
|
||||
/// defined to be their index among the kernel arguments of the same
|
||||
/// type and access qualifier.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/Transforms/Utils/ValueMapper.h"
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <tuple>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
static StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size";
|
||||
static StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format";
|
||||
static StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id";
|
||||
static StringRef GetSamplerResourceIDFunc =
|
||||
"llvm.OpenCL.sampler.get.resource.id";
|
||||
|
||||
StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size";
|
||||
StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format";
|
||||
StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id";
|
||||
StringRef GetSamplerResourceIDFunc = "llvm.OpenCL.sampler.get.resource.id";
|
||||
static StringRef ImageSizeArgMDType = "__llvm_image_size";
|
||||
static StringRef ImageFormatArgMDType = "__llvm_image_format";
|
||||
|
||||
StringRef ImageSizeArgMDType = "__llvm_image_size";
|
||||
StringRef ImageFormatArgMDType = "__llvm_image_format";
|
||||
|
||||
StringRef KernelsMDNodeName = "opencl.kernels";
|
||||
StringRef KernelArgMDNodeNames[] = {
|
||||
static StringRef KernelsMDNodeName = "opencl.kernels";
|
||||
static StringRef KernelArgMDNodeNames[] = {
|
||||
"kernel_arg_addr_space",
|
||||
"kernel_arg_access_qual",
|
||||
"kernel_arg_type",
|
||||
"kernel_arg_base_type",
|
||||
"kernel_arg_type_qual"};
|
||||
const unsigned NumKernelArgMDNodes = 5;
|
||||
static const unsigned NumKernelArgMDNodes = 5;
|
||||
|
||||
typedef SmallVector<Metadata *, 8> MDVector;
|
||||
namespace {
|
||||
|
||||
using MDVector = SmallVector<Metadata *, 8>;
|
||||
struct KernelArgMD {
|
||||
MDVector ArgVector[NumKernelArgMDNodes];
|
||||
};
|
||||
@ -303,7 +320,7 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
|
||||
CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns);
|
||||
|
||||
// Build new MDNode.
|
||||
SmallVector<llvm::Metadata *, 6> KernelMDArgs;
|
||||
SmallVector<Metadata *, 6> KernelMDArgs;
|
||||
KernelMDArgs.push_back(ConstantAsMetadata::get(NewF));
|
||||
for (unsigned i = 0; i < NumKernelArgMDNodes; ++i)
|
||||
KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i]));
|
||||
@ -346,7 +363,7 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
|
||||
return Modified;
|
||||
}
|
||||
|
||||
public:
|
||||
public:
|
||||
AMDGPUOpenCLImageTypeLoweringPass() : ModulePass(ID) {}
|
||||
|
||||
bool runOnModule(Module &M) override {
|
||||
@ -363,10 +380,10 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
|
||||
}
|
||||
};
|
||||
|
||||
char AMDGPUOpenCLImageTypeLoweringPass::ID = 0;
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char AMDGPUOpenCLImageTypeLoweringPass::ID = 0;
|
||||
|
||||
ModulePass *llvm::createAMDGPUOpenCLImageTypeLoweringPass() {
|
||||
return new AMDGPUOpenCLImageTypeLoweringPass();
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ---------===//
|
||||
//===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -44,22 +44,39 @@
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
|
||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/IR/User.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cassert>
|
||||
#include <utility>
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
static cl::opt<bool> AnyAddressSpace(
|
||||
"amdgpu-any-address-space-out-arguments",
|
||||
cl::desc("Replace pointer out arguments with "
|
||||
@ -78,6 +95,8 @@ STATISTIC(NumOutArgumentsReplaced,
|
||||
STATISTIC(NumOutArgumentFunctionsReplaced,
|
||||
"Number of functions with out arguments moved to struct return values");
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPURewriteOutArguments : public FunctionPass {
|
||||
private:
|
||||
const DataLayout *DL = nullptr;
|
||||
@ -89,11 +108,11 @@ private:
|
||||
#ifndef NDEBUG
|
||||
bool isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const;
|
||||
#endif
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPURewriteOutArguments() :
|
||||
FunctionPass(ID) {}
|
||||
AMDGPURewriteOutArguments() : FunctionPass(ID) {}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MemoryDependenceWrapperPass>();
|
||||
@ -101,10 +120,10 @@ public:
|
||||
}
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
bool runOnFunction(Function &M) override;
|
||||
bool runOnFunction(Function &F) override;
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE,
|
||||
"AMDGPU Rewrite Out Arguments", false, false)
|
||||
@ -239,7 +258,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
|
||||
if (OutArgs.empty())
|
||||
return false;
|
||||
|
||||
typedef SmallVector<std::pair<Argument *, Value *>, 4> ReplacementVec;
|
||||
using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;
|
||||
|
||||
DenseMap<ReturnInst *, ReplacementVec> Replacements;
|
||||
|
||||
SmallVector<ReturnInst *, 4> Returns;
|
||||
@ -373,7 +393,6 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
|
||||
if (RetVal)
|
||||
NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
|
||||
|
||||
|
||||
for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
|
||||
Argument *Arg = ReturnPoint.first;
|
||||
Value *Val = ReturnPoint.second;
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
|
||||
//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -16,15 +16,39 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUTargetTransformInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||
#include "llvm/CodeGen/MachineValueType.h"
|
||||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/CostTable.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "AMDGPUtti"
|
||||
@ -54,7 +78,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
|
||||
if (!L->contains(I))
|
||||
continue;
|
||||
if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
|
||||
if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
|
||||
if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
|
||||
return SubLoop->contains(PHI); }))
|
||||
return true;
|
||||
} else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
|
||||
@ -66,7 +90,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
|
||||
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP) {
|
||||
UP.Threshold = 300; // Twice the default.
|
||||
UP.MaxCount = UINT_MAX;
|
||||
UP.MaxCount = std::numeric_limits<unsigned>::max();
|
||||
UP.Partial = true;
|
||||
|
||||
// TODO: Do we want runtime unrolling?
|
||||
@ -81,12 +105,11 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
const DataLayout &DL = BB->getModule()->getDataLayout();
|
||||
unsigned LocalGEPsSeen = 0;
|
||||
|
||||
if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
|
||||
if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
|
||||
return SubLoop->contains(BB); }))
|
||||
continue; // Block belongs to an inner loop.
|
||||
|
||||
for (const Instruction &I : *BB) {
|
||||
|
||||
// Unroll a loop which contains an "if" statement whose condition
|
||||
// defined by a PHI belonging to the loop. This may help to eliminate
|
||||
// if region and potentially even PHI itself, saving on both divergence
|
||||
@ -153,7 +176,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
if (!Inst || L->isLoopInvariant(Op))
|
||||
continue;
|
||||
|
||||
if (any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
|
||||
if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
|
||||
return SubLoop->contains(Inst); }))
|
||||
continue;
|
||||
HasLoopDef = true;
|
||||
@ -268,7 +291,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
|
||||
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
|
||||
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
|
||||
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
|
||||
|
||||
EVT OrigTy = TLI->getValueType(DL, Ty);
|
||||
if (!OrigTy.isSimple()) {
|
||||
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
|
||||
@ -289,25 +311,23 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
|
||||
switch (ISD) {
|
||||
case ISD::SHL:
|
||||
case ISD::SRL:
|
||||
case ISD::SRA: {
|
||||
case ISD::SRA:
|
||||
if (SLT == MVT::i64)
|
||||
return get64BitInstrCost() * LT.first * NElts;
|
||||
|
||||
// i32
|
||||
return getFullRateInstrCost() * LT.first * NElts;
|
||||
}
|
||||
case ISD::ADD:
|
||||
case ISD::SUB:
|
||||
case ISD::AND:
|
||||
case ISD::OR:
|
||||
case ISD::XOR: {
|
||||
case ISD::XOR:
|
||||
if (SLT == MVT::i64){
|
||||
// and, or and xor are typically split into 2 VALU instructions.
|
||||
return 2 * getFullRateInstrCost() * LT.first * NElts;
|
||||
}
|
||||
|
||||
return LT.first * NElts * getFullRateInstrCost();
|
||||
}
|
||||
case ISD::MUL: {
|
||||
const int QuarterRateCost = getQuarterRateInstrCost();
|
||||
if (SLT == MVT::i64) {
|
||||
@ -327,7 +347,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
|
||||
if (SLT == MVT::f32 || SLT == MVT::f16)
|
||||
return LT.first * NElts * getFullRateInstrCost();
|
||||
break;
|
||||
|
||||
case ISD::FDIV:
|
||||
case ISD::FREM:
|
||||
// FIXME: frem should be handled separately. The fdiv in it is most of it,
|
||||
@ -348,7 +367,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
|
||||
int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
|
||||
return LT.first * NElts * Cost;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -465,11 +483,9 @@ static bool isArgPassedInSGPR(const Argument *A) {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// \returns true if the result of the value could potentially be
|
||||
/// different across workitems in a wavefront.
|
||||
bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
|
||||
|
||||
if (const Argument *A = dyn_cast<Argument>(V))
|
||||
return !isArgPassedInSGPR(A);
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
|
||||
//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -6,35 +6,48 @@
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This file a TargetTransformInfo::Concept conforming object specific to the
|
||||
/// AMDGPU target machine. It uses the target's detailed information to
|
||||
/// provide more precise answers to certain TTI queries, while letting the
|
||||
/// target independent and default TTI implementations handle the rest.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetLowering;
|
||||
class Loop;
|
||||
class ScalarEvolution;
|
||||
class Type;
|
||||
class Value;
|
||||
|
||||
class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
|
||||
typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
|
||||
typedef TargetTransformInfo TTI;
|
||||
using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
|
||||
using TTI = TargetTransformInfo;
|
||||
|
||||
friend BaseT;
|
||||
|
||||
const AMDGPUSubtarget *ST;
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
bool IsGraphicsShader;
|
||||
|
||||
|
||||
const FeatureBitset InlineFeatureIgnoreList = {
|
||||
// Codegen control options which don't matter.
|
||||
AMDGPU::FeatureEnableLoadStoreOpt,
|
||||
@ -63,7 +76,6 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
|
||||
const AMDGPUSubtarget *getST() const { return ST; }
|
||||
const AMDGPUTargetLowering *getTLI() const { return TLI; }
|
||||
|
||||
|
||||
static inline int getFullRateInstrCost() {
|
||||
return TargetTransformInfo::TCC_Basic;
|
||||
}
|
||||
@ -104,7 +116,7 @@ public:
|
||||
|
||||
unsigned getHardwareNumberOfRegisters(bool Vector) const;
|
||||
unsigned getNumberOfRegisters(bool Vector) const;
|
||||
unsigned getRegisterBitWidth(bool Vector) const ;
|
||||
unsigned getRegisterBitWidth(bool Vector) const;
|
||||
unsigned getMinVectorRegisterBitWidth() const;
|
||||
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
|
||||
|
||||
@ -154,4 +166,4 @@ public:
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
|
||||
|
@ -1,11 +1,10 @@
|
||||
//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
|
||||
//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
@ -67,7 +66,7 @@ STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
|
||||
|
||||
namespace llvm {
|
||||
|
||||
void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
|
||||
void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
@ -121,9 +120,9 @@ public:
|
||||
|
||||
class AMDGPUCFGStructurizer : public MachineFunctionPass {
|
||||
public:
|
||||
typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
|
||||
typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
|
||||
typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
|
||||
using MBBVector = SmallVector<MachineBasicBlock *, 32>;
|
||||
using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
|
||||
using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
|
||||
|
||||
enum PathToKind {
|
||||
Not_SinglePath = 0,
|
||||
@ -234,6 +233,7 @@ protected:
|
||||
void insertCondBranchBefore(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I, int NewOpcode,
|
||||
int RegNum, const DebugLoc &DL);
|
||||
|
||||
static int getBranchNzeroOpcode(int OldOpcode);
|
||||
static int getBranchZeroOpcode(int OldOpcode);
|
||||
static int getContinueNzeroOpcode(int OldOpcode);
|
||||
@ -246,21 +246,25 @@ protected:
|
||||
static bool isUncondBranch(MachineInstr *MI);
|
||||
static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
|
||||
static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
|
||||
|
||||
/// The correct naming for this is getPossibleLoopendBlockBranchInstr.
|
||||
///
|
||||
/// BB with backward-edge could have move instructions after the branch
|
||||
/// instruction. Such move instruction "belong to" the loop backward-edge.
|
||||
MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
|
||||
|
||||
static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
|
||||
static bool isReturnBlock(MachineBasicBlock *MBB);
|
||||
static void cloneSuccessorList(MachineBasicBlock *DstMBB,
|
||||
MachineBasicBlock *SrcMBB) ;
|
||||
MachineBasicBlock *SrcMBB);
|
||||
static MachineBasicBlock *clone(MachineBasicBlock *MBB);
|
||||
|
||||
/// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
|
||||
/// because the AMDGPU instruction is not recognized as terminator fix this
|
||||
/// and retire this routine
|
||||
void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
|
||||
MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
|
||||
|
||||
static void wrapup(MachineBasicBlock *MBB);
|
||||
|
||||
int patternMatch(MachineBasicBlock *MBB);
|
||||
@ -299,6 +303,7 @@ protected:
|
||||
MachineBasicBlock *LandMBB);
|
||||
void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
|
||||
MachineBasicBlock *ContMBB);
|
||||
|
||||
/// normalizeInfiniteLoopExit change
|
||||
/// B1:
|
||||
/// uncond_br LoopHeader
|
||||
@ -309,6 +314,7 @@ protected:
|
||||
/// and return the newly added dummy exit block
|
||||
MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
|
||||
void removeUnconditionalBranch(MachineBasicBlock *MBB);
|
||||
|
||||
/// Remove duplicate branches instructions in a block.
|
||||
/// For instance
|
||||
/// B0:
|
||||
@ -318,6 +324,7 @@ protected:
|
||||
/// B0:
|
||||
/// cond_br X B1 B2
|
||||
void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
|
||||
|
||||
void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
|
||||
void removeSuccessor(MachineBasicBlock *MBB);
|
||||
MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
|
||||
@ -335,10 +342,10 @@ private:
|
||||
SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
|
||||
};
|
||||
|
||||
char AMDGPUCFGStructurizer::ID = 0;
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char AMDGPUCFGStructurizer::ID = 0;
|
||||
|
||||
int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
|
||||
MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
|
||||
if (It == BlockInfoMap.end())
|
||||
@ -535,7 +542,7 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
|
||||
case AMDGPU::JUMP_COND:
|
||||
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
|
||||
default: llvm_unreachable("internal error");
|
||||
};
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1597,7 +1604,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
|
||||
MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
|
||||
if (!BranchMI) {
|
||||
DEBUG(
|
||||
dbgs() << "migrateInstruction don't see branch instr\n" ;
|
||||
dbgs() << "migrateInstruction don't see branch instr\n";
|
||||
);
|
||||
SpliceEnd = SrcMBB->end();
|
||||
} else {
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
|
||||
//===- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -6,39 +6,33 @@
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file AMDKernelCodeT.h
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDKERNELCODET_H
|
||||
#define AMDKERNELCODET_H
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H
|
||||
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
//---------------------------------------------------------------------------//
|
||||
// AMD Kernel Code, and its dependencies //
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
typedef uint8_t hsa_powertwo8_t;
|
||||
typedef uint32_t hsa_ext_code_kind_t;
|
||||
typedef uint8_t hsa_ext_brig_profile8_t;
|
||||
typedef uint8_t hsa_ext_brig_machine_model8_t;
|
||||
typedef uint64_t hsa_ext_control_directive_present64_t;
|
||||
typedef uint16_t hsa_ext_exception_kind16_t;
|
||||
typedef uint32_t hsa_ext_code_kind32_t;
|
||||
using hsa_powertwo8_t = uint8_t;
|
||||
using hsa_ext_code_kind_t = uint32_t;
|
||||
using hsa_ext_brig_profile8_t = uint8_t;
|
||||
using hsa_ext_brig_machine_model8_t = uint8_t;
|
||||
using hsa_ext_control_directive_present64_t = uint64_t;
|
||||
using hsa_ext_exception_kind16_t = uint16_t;
|
||||
using hsa_ext_code_kind32_t = uint32_t;
|
||||
|
||||
typedef struct hsa_dim3_s {
|
||||
using hsa_dim3_t = struct {
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
} hsa_dim3_t;
|
||||
};
|
||||
|
||||
/// The version of the amd_*_code_t struct. Minor versions must be
|
||||
/// backward compatible.
|
||||
typedef uint32_t amd_code_version32_t;
|
||||
using amd_code_version32_t = uint32_t;
|
||||
enum amd_code_version_t {
|
||||
AMD_CODE_VERSION_MAJOR = 0,
|
||||
AMD_CODE_VERSION_MINOR = 1
|
||||
@ -64,7 +58,7 @@ enum amd_element_byte_size_t {
|
||||
|
||||
/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
|
||||
/// COMPUTE_PGM_RSRC2 registers.
|
||||
typedef uint64_t amd_compute_pgm_resource_register64_t;
|
||||
using amd_compute_pgm_resource_register64_t = uint64_t;
|
||||
|
||||
/// Every amd_*_code_t has the following properties, which are composed of
|
||||
/// a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
|
||||
@ -74,9 +68,8 @@ typedef uint64_t amd_compute_pgm_resource_register64_t;
|
||||
/// (Note that bit fields cannot be used as their layout is
|
||||
/// implementation defined in the C standard and so cannot be used to
|
||||
/// specify an ABI)
|
||||
typedef uint32_t amd_code_property32_t;
|
||||
using amd_code_property32_t = uint32_t;
|
||||
enum amd_code_property_mask_t {
|
||||
|
||||
/// Enable the setup of the SGPR user data registers
|
||||
/// (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
|
||||
/// for initial register state.
|
||||
@ -207,7 +200,7 @@ enum amd_code_property_mask_t {
|
||||
/// directives. See the definition of the control directives in HSA Programmer's
|
||||
/// Reference Manual which also defines how the values specified as finalizer
|
||||
/// arguments have to agree with the control directives in the HSAIL code.
|
||||
typedef struct hsa_ext_control_directives_s {
|
||||
using hsa_ext_control_directives_t = struct {
|
||||
/// This is a bit set indicating which control directives have been
|
||||
/// specified. If the value is 0 then there are no control directives specified
|
||||
/// and the rest of the fields can be ignored. The bits are accessed using the
|
||||
@ -312,7 +305,7 @@ typedef struct hsa_ext_control_directives_s {
|
||||
|
||||
/// Reserved. Must be 0.
|
||||
uint8_t reserved[75];
|
||||
} hsa_ext_control_directives_t;
|
||||
};
|
||||
|
||||
/// AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
|
||||
/// Code Object to set up the hardware to execute the kernel dispatch.
|
||||
@ -522,9 +515,8 @@ typedef struct hsa_ext_control_directives_s {
|
||||
/// dispatch packet kernArgPtr to a kernarg segment address before using this V#.
|
||||
/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
|
||||
/// the kernarg segment is constant for the duration of the kernel execution.
|
||||
///
|
||||
|
||||
typedef struct amd_kernel_code_s {
|
||||
using amd_kernel_code_t = struct {
|
||||
uint32_t amd_kernel_code_version_major;
|
||||
uint32_t amd_kernel_code_version_minor;
|
||||
uint16_t amd_machine_kind;
|
||||
@ -653,6 +645,6 @@ typedef struct amd_kernel_code_s {
|
||||
uint8_t reserved3[12];
|
||||
uint64_t runtime_loader_kernel_symbol;
|
||||
uint64_t control_directives[16];
|
||||
} amd_kernel_code_t;
|
||||
};
|
||||
|
||||
#endif // AMDKERNELCODET_H
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===----------------------- GCNMinRegStrategy.cpp - ----------------------===//
|
||||
//===- GCNMinRegStrategy.cpp ----------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -6,18 +6,27 @@
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/ilist_node.h"
|
||||
#include "llvm/ADT/simple_ilist.h"
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
#include "llvm/Support/Allocator.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "machine-scheduler"
|
||||
|
||||
namespace {
|
||||
|
||||
class GCNMinRegScheduler {
|
||||
struct Candidate : ilist_node<Candidate> {
|
||||
const SUnit *SU;
|
||||
@ -28,7 +37,7 @@ class GCNMinRegScheduler {
|
||||
};
|
||||
|
||||
SpecificBumpPtrAllocator<Candidate> Alloc;
|
||||
typedef simple_ilist<Candidate> Queue;
|
||||
using Queue = simple_ilist<Candidate>;
|
||||
Queue RQ; // Ready queue
|
||||
|
||||
std::vector<unsigned> NumPreds;
|
||||
@ -72,7 +81,8 @@ public:
|
||||
std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
|
||||
const ScheduleDAG &DAG);
|
||||
};
|
||||
} // namespace
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
|
||||
NumPreds.resize(SUnits.size());
|
||||
@ -104,7 +114,9 @@ int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
|
||||
template <typename Calc>
|
||||
unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
|
||||
assert(!RQ.empty() && Num <= RQ.size());
|
||||
typedef decltype(C(*RQ.begin())) T;
|
||||
|
||||
using T = decltype(C(*RQ.begin())) ;
|
||||
|
||||
T Max = std::numeric_limits<T>::min();
|
||||
unsigned NumMax = 0;
|
||||
for (auto I = RQ.begin(); Num; --Num) {
|
||||
@ -260,9 +272,11 @@ GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
||||
std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
|
||||
const ScheduleDAG &DAG) {
|
||||
GCNMinRegScheduler S;
|
||||
return S.schedule(TopRoots, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
Loading…
Reference in New Issue
Block a user