1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[AMDGPU] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 310429
This commit is contained in:
Eugene Zelenko 2017-08-08 23:53:55 +00:00
parent 3f63039f98
commit 3ed10e19e4
10 changed files with 303 additions and 196 deletions

View File

@ -1,4 +1,4 @@
//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
//===- AMDGPUAliasAnalysis ------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -12,13 +12,21 @@
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPU.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
using namespace llvm;
@ -26,6 +34,7 @@ using namespace llvm;
// Register this pass...
char AMDGPUAAWrapperPass::ID = 0;
INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
"AMDGPU Address space based Alias Analysis", false, true)

View File

@ -1,4 +1,4 @@
//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
//===- AMDGPUAliasAnalysis --------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -10,17 +10,24 @@
/// This is the AMGPU address space based alias analysis pass.
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#include "AMDGPU.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include <algorithm>
#include <memory>
namespace llvm {
class DataLayout;
class MDNode;
class MemoryLocation;
/// A simple AA result that uses TBAA metadata to answer queries.
class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
friend AAResultBase<AMDGPUAAResult>;
@ -50,7 +57,9 @@ private:
class ASAliasRulesTy {
public:
ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
private:
Triple::ArchType Arch;
AMDGPUAS AS;
@ -61,10 +70,11 @@ private:
/// Analysis pass providing a never-invalidated alias analysis result.
class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
friend AnalysisInfoMixin<AMDGPUAA>;
static char PassID;
public:
typedef AMDGPUAAResult Result;
using Result = AMDGPUAAResult;
AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
return AMDGPUAAResult(F.getParent()->getDataLayout(),
@ -91,12 +101,15 @@ public:
Triple(M.getTargetTriple())));
return false;
}
bool doFinalization(Module &M) override {
Result.reset();
return false;
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
}
#endif // LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H

View File

@ -14,46 +14,55 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegionInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <tuple>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "amdgpucfgstructurizer"
namespace {
class PHILinearizeDestIterator;
class PHILinearize {
friend class PHILinearizeDestIterator;
public:
typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT;
using PHISourceT = std::pair<unsigned, MachineBasicBlock *>;
private:
typedef DenseSet<PHISourceT> PHISourcesT;
typedef struct {
using PHISourcesT = DenseSet<PHISourceT>;
using PHIInfoElementT = struct {
unsigned DestReg;
DebugLoc DL;
PHISourcesT Sources;
} PHIInfoElementT;
typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT;
};
using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>;
PHIInfoT PHIInfo;
static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
@ -85,8 +94,8 @@ public:
void dump(MachineRegisterInfo *MRI);
void clear();
typedef PHISourcesT::iterator source_iterator;
typedef PHILinearizeDestIterator dest_iterator;
using source_iterator = PHISourcesT::iterator;
using dest_iterator = PHILinearizeDestIterator;
dest_iterator dests_begin();
dest_iterator dests_end();
@ -100,6 +109,8 @@ private:
PHILinearize::PHIInfoT::iterator Iter;
public:
PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
PHILinearizeDestIterator &operator++() {
++Iter;
@ -111,10 +122,10 @@ public:
bool operator!=(const PHILinearizeDestIterator &I) const {
return I.Iter != Iter;
}
PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
};
} // end anonymous namespace
unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
return Info->DestReg;
}
@ -282,14 +293,12 @@ PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
auto InfoElement = findPHIInfoElement(Reg);
return phiInfoElementGetSources(InfoElement).begin();
}
PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
auto InfoElement = findPHIInfoElement(Reg);
return phiInfoElementGetSources(InfoElement).end();
}
class RegionMRT;
class MBBMRT;
static unsigned getPHINumInputs(MachineInstr &PHI) {
assert(PHI.isPHI());
return (PHI.getNumOperands() - 1) / 2;
@ -315,6 +324,11 @@ static unsigned getPHIDestReg(MachineInstr &PHI) {
return PHI.getOperand(0).getReg();
}
namespace {
class RegionMRT;
class MBBMRT;
class LinearizedRegion {
protected:
MachineBasicBlock *Entry;
@ -349,6 +363,11 @@ protected:
RegionMRT *TopRegion = nullptr);
public:
LinearizedRegion();
LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
~LinearizedRegion() = default;
void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
RegionMRT *getRegionMRT() { return RMRT; }
@ -413,13 +432,6 @@ public:
void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
LinearizedRegion();
~LinearizedRegion();
};
class MRT {
@ -429,6 +441,8 @@ protected:
unsigned BBSelectRegOut;
public:
virtual ~MRT() = default;
unsigned getBBSelectRegIn() { return BBSelectRegIn; }
unsigned getBBSelectRegOut() { return BBSelectRegOut; }
@ -467,42 +481,55 @@ public:
dbgs() << " ";
}
}
virtual ~MRT() {}
};
class MBBMRT : public MRT {
MachineBasicBlock *MBB;
public:
virtual MBBMRT *getMBBMRT() { return this; }
MachineBasicBlock *getMBB() { return MBB; }
virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
dumpDepth(depth);
dbgs() << "MBB: " << getMBB()->getNumber();
dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
}
MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
setParent(nullptr);
setBBSelectRegOut(0);
setBBSelectRegIn(0);
}
MBBMRT *getMBBMRT() override { return this; }
MachineBasicBlock *getMBB() { return MBB; }
void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
dumpDepth(depth);
dbgs() << "MBB: " << getMBB()->getNumber();
dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
}
};
class RegionMRT : public MRT {
protected:
MachineRegion *Region;
LinearizedRegion *LRegion;
MachineBasicBlock *Succ;
LinearizedRegion *LRegion = nullptr;
MachineBasicBlock *Succ = nullptr;
SetVector<MRT *> Children;
public:
virtual RegionMRT *getRegionMRT() { return this; }
RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) {
setParent(nullptr);
setBBSelectRegOut(0);
setBBSelectRegIn(0);
}
~RegionMRT() override {
if (LRegion) {
delete LRegion;
}
for (auto CI : Children) {
delete &(*CI);
}
}
RegionMRT *getRegionMRT() override { return this; }
void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
LRegion = LinearizeRegion;
@ -520,7 +547,7 @@ public:
SetVector<MRT *> *getChildren() { return &Children; }
virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
dumpDepth(depth);
dbgs() << "Region: " << (void *)Region;
dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
@ -583,25 +610,10 @@ public:
}
}
}
RegionMRT(MachineRegion *MachineRegion)
: Region(MachineRegion), LRegion(nullptr), Succ(nullptr) {
setParent(nullptr);
setBBSelectRegOut(0);
setBBSelectRegIn(0);
}
virtual ~RegionMRT() {
if (LRegion) {
delete LRegion;
}
for (auto CI : Children) {
delete &(*CI);
}
}
};
} // end anonymous namespace
static unsigned createBBSelectReg(const SIInstrInfo *TII,
MachineRegisterInfo *MRI) {
return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32));
@ -1063,7 +1075,7 @@ LinearizedRegion::LinearizedRegion() {
Parent = nullptr;
}
LinearizedRegion::~LinearizedRegion() {}
namespace {
class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
private:
@ -1074,6 +1086,7 @@ private:
unsigned BBSelectRegister;
PHILinearize PHIInfo;
DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
RegionMRT *RMRT;
void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
SmallVector<unsigned, 2> &RegionIndices);
@ -1197,15 +1210,15 @@ private:
public:
static char ID;
AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineRegionInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
}
void initFallthroughMap(MachineFunction &MF);
void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
@ -1214,14 +1227,14 @@ public:
MachineRegisterInfo *MRI,
const SIInstrInfo *TII);
RegionMRT *RMRT;
void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
RegionMRT *getRegionMRT() { return RMRT; }
bool runOnMachineFunction(MachineFunction &MF) override;
};
}
} // end anonymous namespace
char AMDGPUMachineCFGStructurizer::ID = 0;
@ -1258,7 +1271,6 @@ void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
}
static void fixMBBTerminator(MachineBasicBlock *MBB) {
if (MBB->succ_size() == 1) {
auto *Succ = *(MBB->succ_begin());
for (auto &TI : MBB->terminators()) {
@ -1535,7 +1547,6 @@ void AMDGPUMachineCFGStructurizer::replacePHI(
void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
SmallVector<unsigned, 2> &PHIRegionIndices) {
DEBUG(dbgs() << "Replace entry PHI: ");
DEBUG(PHI.dump());
DEBUG(dbgs() << " with ");
@ -2491,7 +2502,6 @@ AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
return NewExit;
}
static MachineBasicBlock *split(MachineBasicBlock::iterator I) {
// Create the fall-through block.
MachineBasicBlock *MBB = (*I).getParent();
@ -2845,16 +2855,6 @@ static void checkRegOnlyPHIInputs(MachineFunction &MF) {
}
}
INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
"AMDGPU Machine CFG Structurizer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
"AMDGPU Machine CFG Structurizer", false, false)
char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
@ -2880,6 +2880,14 @@ bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
return result;
}
char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
"AMDGPU Machine CFG Structurizer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
"AMDGPU Machine CFG Structurizer", false, false)
FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() {
return new AMDGPUMachineCFGStructurizer();
}

View File

@ -1,4 +1,4 @@
//===-- AMDGPUOpenCLImageTypeLoweringPass.cpp -----------------------------===//
//===- AMDGPUOpenCLImageTypeLoweringPass.cpp ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -22,40 +22,57 @@
/// Resource IDs of read-only images, write-only images and samplers are
/// defined to be their index among the kernel arguments of the same
/// type and access qualifier.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <tuple>
using namespace llvm;
namespace {
static StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size";
static StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format";
static StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id";
static StringRef GetSamplerResourceIDFunc =
"llvm.OpenCL.sampler.get.resource.id";
StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size";
StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format";
StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id";
StringRef GetSamplerResourceIDFunc = "llvm.OpenCL.sampler.get.resource.id";
static StringRef ImageSizeArgMDType = "__llvm_image_size";
static StringRef ImageFormatArgMDType = "__llvm_image_format";
StringRef ImageSizeArgMDType = "__llvm_image_size";
StringRef ImageFormatArgMDType = "__llvm_image_format";
StringRef KernelsMDNodeName = "opencl.kernels";
StringRef KernelArgMDNodeNames[] = {
static StringRef KernelsMDNodeName = "opencl.kernels";
static StringRef KernelArgMDNodeNames[] = {
"kernel_arg_addr_space",
"kernel_arg_access_qual",
"kernel_arg_type",
"kernel_arg_base_type",
"kernel_arg_type_qual"};
const unsigned NumKernelArgMDNodes = 5;
static const unsigned NumKernelArgMDNodes = 5;
typedef SmallVector<Metadata *, 8> MDVector;
namespace {
using MDVector = SmallVector<Metadata *, 8>;
struct KernelArgMD {
MDVector ArgVector[NumKernelArgMDNodes];
};
@ -303,7 +320,7 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns);
// Build new MDNode.
SmallVector<llvm::Metadata *, 6> KernelMDArgs;
SmallVector<Metadata *, 6> KernelMDArgs;
KernelMDArgs.push_back(ConstantAsMetadata::get(NewF));
for (unsigned i = 0; i < NumKernelArgMDNodes; ++i)
KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i]));
@ -346,7 +363,7 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
return Modified;
}
public:
public:
AMDGPUOpenCLImageTypeLoweringPass() : ModulePass(ID) {}
bool runOnModule(Module &M) override {
@ -363,10 +380,10 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
}
};
char AMDGPUOpenCLImageTypeLoweringPass::ID = 0;
} // end anonymous namespace
char AMDGPUOpenCLImageTypeLoweringPass::ID = 0;
ModulePass *llvm::createAMDGPUOpenCLImageTypeLoweringPass() {
return new AMDGPUOpenCLImageTypeLoweringPass();
}

View File

@ -1,4 +1,4 @@
//===-- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ---------===//
//===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===//
//
// The LLVM Compiler Infrastructure
//
@ -44,22 +44,39 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <utility>
#define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
using namespace llvm;
namespace {
static cl::opt<bool> AnyAddressSpace(
"amdgpu-any-address-space-out-arguments",
cl::desc("Replace pointer out arguments with "
@ -78,6 +95,8 @@ STATISTIC(NumOutArgumentsReplaced,
STATISTIC(NumOutArgumentFunctionsReplaced,
"Number of functions with out arguments moved to struct return values");
namespace {
class AMDGPURewriteOutArguments : public FunctionPass {
private:
const DataLayout *DL = nullptr;
@ -89,11 +108,11 @@ private:
#ifndef NDEBUG
bool isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const;
#endif
public:
static char ID;
AMDGPURewriteOutArguments() :
FunctionPass(ID) {}
AMDGPURewriteOutArguments() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MemoryDependenceWrapperPass>();
@ -101,10 +120,10 @@ public:
}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &M) override;
bool runOnFunction(Function &F) override;
};
} // End anonymous namespace
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE,
"AMDGPU Rewrite Out Arguments", false, false)
@ -239,7 +258,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (OutArgs.empty())
return false;
typedef SmallVector<std::pair<Argument *, Value *>, 4> ReplacementVec;
using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;
DenseMap<ReturnInst *, ReplacementVec> Replacements;
SmallVector<ReturnInst *, 4> Returns;
@ -373,7 +393,6 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (RetVal)
NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
Argument *Arg = ReturnPoint.first;
Value *Val = ReturnPoint.second;

View File

@ -1,4 +1,4 @@
//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
//
// The LLVM Compiler Infrastructure
//
@ -16,15 +16,39 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <limits>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "AMDGPUtti"
@ -54,7 +78,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
if (!L->contains(I))
continue;
if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
return SubLoop->contains(PHI); }))
return true;
} else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
@ -66,7 +90,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Threshold = 300; // Twice the default.
UP.MaxCount = UINT_MAX;
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.Partial = true;
// TODO: Do we want runtime unrolling?
@ -81,12 +105,11 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;
if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
return SubLoop->contains(BB); }))
continue; // Block belongs to an inner loop.
for (const Instruction &I : *BB) {
// Unroll a loop which contains an "if" statement whose condition
// defined by a PHI belonging to the loop. This may help to eliminate
// if region and potentially even PHI itself, saving on both divergence
@ -153,7 +176,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (!Inst || L->isLoopInvariant(Op))
continue;
if (any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
return SubLoop->contains(Inst); }))
continue;
HasLoopDef = true;
@ -268,7 +291,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
EVT OrigTy = TLI->getValueType(DL, Ty);
if (!OrigTy.isSimple()) {
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
@ -289,25 +311,23 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
switch (ISD) {
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: {
case ISD::SRA:
if (SLT == MVT::i64)
return get64BitInstrCost() * LT.first * NElts;
// i32
return getFullRateInstrCost() * LT.first * NElts;
}
case ISD::ADD:
case ISD::SUB:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
case ISD::XOR:
if (SLT == MVT::i64){
// and, or and xor are typically split into 2 VALU instructions.
return 2 * getFullRateInstrCost() * LT.first * NElts;
}
return LT.first * NElts * getFullRateInstrCost();
}
case ISD::MUL: {
const int QuarterRateCost = getQuarterRateInstrCost();
if (SLT == MVT::i64) {
@ -327,7 +347,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
if (SLT == MVT::f32 || SLT == MVT::f16)
return LT.first * NElts * getFullRateInstrCost();
break;
case ISD::FDIV:
case ISD::FREM:
// FIXME: frem should be handled separately. The fdiv in it is most of it,
@ -348,7 +367,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
return LT.first * NElts * Cost;
}
break;
default:
break;
@ -465,11 +483,9 @@ static bool isArgPassedInSGPR(const Argument *A) {
}
}
///
/// \returns true if the result of the value could potentially be
/// different across workitems in a wavefront.
bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
if (const Argument *A = dyn_cast<Argument>(V))
return !isArgPassedInSGPR(A);

View File

@ -1,4 +1,4 @@
//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -6,35 +6,48 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file a TargetTransformInfo::Concept conforming object specific to the
/// AMDGPU target machine. It uses the target's detailed information to
/// provide more precise answers to certain TTI queries, while letting the
/// target independent and default TTI implementations handle the rest.
///
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
namespace llvm {
class AMDGPUTargetLowering;
class Loop;
class ScalarEvolution;
class Type;
class Value;
class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
typedef TargetTransformInfo TTI;
using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
using TTI = TargetTransformInfo;
friend BaseT;
const AMDGPUSubtarget *ST;
const AMDGPUTargetLowering *TLI;
bool IsGraphicsShader;
const FeatureBitset InlineFeatureIgnoreList = {
// Codegen control options which don't matter.
AMDGPU::FeatureEnableLoadStoreOpt,
@ -63,7 +76,6 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
const AMDGPUSubtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
static inline int getFullRateInstrCost() {
return TargetTransformInfo::TCC_Basic;
}
@ -104,7 +116,7 @@ public:
unsigned getHardwareNumberOfRegisters(bool Vector) const;
unsigned getNumberOfRegisters(bool Vector) const;
unsigned getRegisterBitWidth(bool Vector) const ;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
@ -154,4 +166,4 @@ public:
} // end namespace llvm
#endif
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

View File

@ -1,11 +1,10 @@
//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//==-----------------------------------------------------------------------===//
#include "AMDGPU.h"
@ -67,7 +66,7 @@ STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
namespace llvm {
void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
} // end namespace llvm
@ -121,9 +120,9 @@ public:
class AMDGPUCFGStructurizer : public MachineFunctionPass {
public:
typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
using MBBVector = SmallVector<MachineBasicBlock *, 32>;
using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
enum PathToKind {
Not_SinglePath = 0,
@ -234,6 +233,7 @@ protected:
void insertCondBranchBefore(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I, int NewOpcode,
int RegNum, const DebugLoc &DL);
static int getBranchNzeroOpcode(int OldOpcode);
static int getBranchZeroOpcode(int OldOpcode);
static int getContinueNzeroOpcode(int OldOpcode);
@ -246,21 +246,25 @@ protected:
static bool isUncondBranch(MachineInstr *MI);
static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
/// The correct naming for this is getPossibleLoopendBlockBranchInstr.
///
/// BB with backward-edge could have move instructions after the branch
/// instruction. Such move instruction "belong to" the loop backward-edge.
MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
static bool isReturnBlock(MachineBasicBlock *MBB);
static void cloneSuccessorList(MachineBasicBlock *DstMBB,
MachineBasicBlock *SrcMBB) ;
MachineBasicBlock *SrcMBB);
static MachineBasicBlock *clone(MachineBasicBlock *MBB);
/// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
/// because the AMDGPU instruction is not recognized as terminator fix this
/// and retire this routine
void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
static void wrapup(MachineBasicBlock *MBB);
int patternMatch(MachineBasicBlock *MBB);
@ -299,6 +303,7 @@ protected:
MachineBasicBlock *LandMBB);
void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
MachineBasicBlock *ContMBB);
/// normalizeInfiniteLoopExit change
/// B1:
/// uncond_br LoopHeader
@ -309,6 +314,7 @@ protected:
/// and return the newly added dummy exit block
MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
void removeUnconditionalBranch(MachineBasicBlock *MBB);
/// Remove duplicate branches instructions in a block.
/// For instance
/// B0:
@ -318,6 +324,7 @@ protected:
/// B0:
/// cond_br X B1 B2
void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
void removeSuccessor(MachineBasicBlock *MBB);
MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
@ -335,10 +342,10 @@ private:
SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
};
char AMDGPUCFGStructurizer::ID = 0;
} // end anonymous namespace
char AMDGPUCFGStructurizer::ID = 0;
int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
if (It == BlockInfoMap.end())
@ -535,7 +542,7 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
default: llvm_unreachable("internal error");
};
}
return -1;
}
@ -1597,7 +1604,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
if (!BranchMI) {
DEBUG(
dbgs() << "migrateInstruction don't see branch instr\n" ;
dbgs() << "migrateInstruction don't see branch instr\n";
);
SpliceEnd = SrcMBB->end();
} else {

View File

@ -1,4 +1,4 @@
//===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
//===- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -6,39 +6,33 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file AMDKernelCodeT.h
//===----------------------------------------------------------------------===//
#ifndef AMDKERNELCODET_H
#define AMDKERNELCODET_H
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H
#define LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H
#include "llvm/MC/SubtargetFeature.h"
#include <cstddef>
#include <cstdint>
#include "llvm/Support/Debug.h"
//---------------------------------------------------------------------------//
// AMD Kernel Code, and its dependencies //
//---------------------------------------------------------------------------//
typedef uint8_t hsa_powertwo8_t;
typedef uint32_t hsa_ext_code_kind_t;
typedef uint8_t hsa_ext_brig_profile8_t;
typedef uint8_t hsa_ext_brig_machine_model8_t;
typedef uint64_t hsa_ext_control_directive_present64_t;
typedef uint16_t hsa_ext_exception_kind16_t;
typedef uint32_t hsa_ext_code_kind32_t;
using hsa_powertwo8_t = uint8_t;
using hsa_ext_code_kind_t = uint32_t;
using hsa_ext_brig_profile8_t = uint8_t;
using hsa_ext_brig_machine_model8_t = uint8_t;
using hsa_ext_control_directive_present64_t = uint64_t;
using hsa_ext_exception_kind16_t = uint16_t;
using hsa_ext_code_kind32_t = uint32_t;
typedef struct hsa_dim3_s {
using hsa_dim3_t = struct {
uint32_t x;
uint32_t y;
uint32_t z;
} hsa_dim3_t;
};
/// The version of the amd_*_code_t struct. Minor versions must be
/// backward compatible.
typedef uint32_t amd_code_version32_t;
using amd_code_version32_t = uint32_t;
enum amd_code_version_t {
AMD_CODE_VERSION_MAJOR = 0,
AMD_CODE_VERSION_MINOR = 1
@ -64,7 +58,7 @@ enum amd_element_byte_size_t {
/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
/// COMPUTE_PGM_RSRC2 registers.
typedef uint64_t amd_compute_pgm_resource_register64_t;
using amd_compute_pgm_resource_register64_t = uint64_t;
/// Every amd_*_code_t has the following properties, which are composed of
/// a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
@ -74,9 +68,8 @@ typedef uint64_t amd_compute_pgm_resource_register64_t;
/// (Note that bit fields cannot be used as their layout is
/// implementation defined in the C standard and so cannot be used to
/// specify an ABI)
typedef uint32_t amd_code_property32_t;
using amd_code_property32_t = uint32_t;
enum amd_code_property_mask_t {
/// Enable the setup of the SGPR user data registers
/// (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
/// for initial register state.
@ -207,7 +200,7 @@ enum amd_code_property_mask_t {
/// directives. See the definition of the control directives in HSA Programmer's
/// Reference Manual which also defines how the values specified as finalizer
/// arguments have to agree with the control directives in the HSAIL code.
typedef struct hsa_ext_control_directives_s {
using hsa_ext_control_directives_t = struct {
/// This is a bit set indicating which control directives have been
/// specified. If the value is 0 then there are no control directives specified
/// and the rest of the fields can be ignored. The bits are accessed using the
@ -312,7 +305,7 @@ typedef struct hsa_ext_control_directives_s {
/// Reserved. Must be 0.
uint8_t reserved[75];
} hsa_ext_control_directives_t;
};
/// AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
/// Code Object to set up the hardware to execute the kernel dispatch.
@ -522,9 +515,8 @@ typedef struct hsa_ext_control_directives_s {
/// dispatch packet kernArgPtr to a kernarg segment address before using this V#.
/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
/// the kernarg segment is constant for the duration of the kernel execution.
///
typedef struct amd_kernel_code_s {
using amd_kernel_code_t = struct {
uint32_t amd_kernel_code_version_major;
uint32_t amd_kernel_code_version_minor;
uint16_t amd_machine_kind;
@ -653,6 +645,6 @@ typedef struct amd_kernel_code_s {
uint8_t reserved3[12];
uint64_t runtime_loader_kernel_symbol;
uint64_t control_directives[16];
} amd_kernel_code_t;
};
#endif // AMDKERNELCODET_H
#endif // LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H

View File

@ -1,4 +1,4 @@
//===----------------------- GCNMinRegStrategy.cpp - ----------------------===//
//===- GCNMinRegStrategy.cpp ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -6,18 +6,27 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/simple_ilist.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
namespace {
class GCNMinRegScheduler {
struct Candidate : ilist_node<Candidate> {
const SUnit *SU;
@ -28,7 +37,7 @@ class GCNMinRegScheduler {
};
SpecificBumpPtrAllocator<Candidate> Alloc;
typedef simple_ilist<Candidate> Queue;
using Queue = simple_ilist<Candidate>;
Queue RQ; // Ready queue
std::vector<unsigned> NumPreds;
@ -72,7 +81,8 @@ public:
std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
const ScheduleDAG &DAG);
};
} // namespace
} // end anonymous namespace
void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
NumPreds.resize(SUnits.size());
@ -104,7 +114,9 @@ int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
template <typename Calc>
unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
assert(!RQ.empty() && Num <= RQ.size());
typedef decltype(C(*RQ.begin())) T;
using T = decltype(C(*RQ.begin())) ;
T Max = std::numeric_limits<T>::min();
unsigned NumMax = 0;
for (auto I = RQ.begin(); Num; --Num) {
@ -260,9 +272,11 @@ GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
}
namespace llvm {
std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
const ScheduleDAG &DAG) {
GCNMinRegScheduler S;
return S.schedule(TopRoots, DAG);
}
}
} // end namespace llvm