1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

ExecutionDepsFix: Let targets specialize the pass; NFC

Let targets specialize the pass with the register class so we can get a
parameterless default constructor and can put the pass into the pass
registry to enable testing with -run-pass=.

llvm-svn: 298184
This commit is contained in:
Matthias Braun 2017-03-18 05:08:58 +00:00
parent ea385b5555
commit ea69eb48b2
5 changed files with 261 additions and 221 deletions

View File

@ -0,0 +1,221 @@
//===- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file Execution Dependency Fix pass.
///
/// Some X86 SSE instructions like mov, and, or, xor are available in different
/// variants for different operand types. These variant instructions are
/// equivalent, but on Nehalem and newer cpus there is extra latency
/// transferring data between integer and floating point domains. ARM cores
/// have similar issues when they are configured with both VFP and NEON
/// pipelines.
///
/// This pass changes the variant instructions to minimize domain crossings.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H
#define LLVM_CODEGEN_EXECUTIONDEPSFIX_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/Allocator.h"
#include <vector>
namespace llvm {
/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
/// of execution domains.
///
/// An open DomainValue represents a set of instructions that can still switch
/// execution domain. Multiple registers may refer to the same open
/// DomainValue - they will eventually be collapsed to the same execution
/// domain.
///
/// A collapsed DomainValue represents a single register that has been forced
/// into one of more execution domains. There is a separate collapsed
/// DomainValue for each register, but it may contain multiple execution
/// domains. A register value is initially created in a single execution
/// domain, but if we were forced to pay the penalty of a domain crossing, we
/// keep track of the fact that the register is now available in multiple
/// domains.
struct DomainValue {
// Basic reference counting.
unsigned Refs;
// Bitmask of available domains. For an open DomainValue, it is the still
// possible domains for collapsing. For a collapsed DomainValue it is the
// domains where the register is available for free.
unsigned AvailableDomains;
// Pointer to the next DomainValue in a chain. When two DomainValues are
// merged, Victim.Next is set to point to Victor, so old DomainValue
// references can be updated by following the chain.
DomainValue *Next;
// Twiddleable instructions using or defining these registers.
SmallVector<MachineInstr*, 8> Instrs;
// A collapsed DomainValue has no instructions to twiddle - it simply keeps
// track of the domains where the registers are already available.
bool isCollapsed() const { return Instrs.empty(); }
// Is domain available?
bool hasDomain(unsigned domain) const {
assert(domain <
static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
"undefined behavior");
return AvailableDomains & (1u << domain);
}
// Mark domain as available.
void addDomain(unsigned domain) {
AvailableDomains |= 1u << domain;
}
// Restrict to a single domain available.
void setSingleDomain(unsigned domain) {
AvailableDomains = 1u << domain;
}
// Return bitmask of domains that are available and in mask.
unsigned getCommonDomains(unsigned mask) const {
return AvailableDomains & mask;
}
// First domain available.
unsigned getFirstDomain() const {
return countTrailingZeros(AvailableDomains);
}
DomainValue() : Refs(0) { clear(); }
// Clear this DomainValue and point to next which has all its data.
void clear() {
AvailableDomains = 0;
Next = nullptr;
Instrs.clear();
}
};
/// Information about a live register.
struct LiveReg {
/// Value currently in this register, or NULL when no value is being tracked.
/// This counts as a DomainValue reference.
DomainValue *Value;
/// Instruction that defined this register, relative to the beginning of the
/// current basic block. When a LiveReg is used to represent a live-out
/// register, this value is relative to the end of the basic block, so it
/// will be a negative number.
int Def;
};
class ExecutionDepsFix : public MachineFunctionPass {
SpecificBumpPtrAllocator<DomainValue> Allocator;
SmallVector<DomainValue*,16> Avail;
const TargetRegisterClass *const RC;
MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
RegisterClassInfo RegClassInfo;
std::vector<SmallVector<int, 1>> AliasMap;
const unsigned NumRegs;
LiveReg *LiveRegs;
struct MBBInfo {
// Keeps clearance and domain information for all registers. Note that this
// is different from the usual definition notion of liveness. The CPU
// doesn't care whether or not we consider a register killed.
LiveReg *OutRegs;
// Whether we have gotten to this block in primary processing yet.
bool PrimaryCompleted;
// The number of predecessors for which primary processing has completed
unsigned IncomingProcessed;
// The value of `IncomingProcessed` at the start of primary processing
unsigned PrimaryIncoming;
// The number of predecessors for which all processing steps are done.
unsigned IncomingCompleted;
MBBInfo()
: OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0),
PrimaryIncoming(0), IncomingCompleted(0) {}
};
typedef DenseMap<MachineBasicBlock *, MBBInfo> MBBInfoMap;
MBBInfoMap MBBInfos;
/// List of undefined register reads in this block in forward order.
std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
/// Storage for register unit liveness.
LivePhysRegs LiveRegSet;
/// Current instruction number.
/// The first instruction in each basic block is 0.
int CurInstr;
public:
ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC)
: MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
private:
iterator_range<SmallVectorImpl<int>::const_iterator>
regIndices(unsigned Reg) const;
// DomainValue allocation.
DomainValue *alloc(int domain = -1);
DomainValue *retain(DomainValue *DV) {
if (DV) ++DV->Refs;
return DV;
}
void release(DomainValue*);
DomainValue *resolve(DomainValue*&);
// LiveRegs manipulations.
void setLiveReg(int rx, DomainValue *DV);
void kill(int rx);
void force(int rx, unsigned domain);
void collapse(DomainValue *dv, unsigned domain);
bool merge(DomainValue *A, DomainValue *B);
void enterBasicBlock(MachineBasicBlock*);
void leaveBasicBlock(MachineBasicBlock*);
bool isBlockDone(MachineBasicBlock *);
void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass);
void updateSuccessors(MachineBasicBlock *MBB, bool PrimaryPass);
bool visitInstr(MachineInstr *);
void processDefs(MachineInstr *, bool breakDependency, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
unsigned Pref);
bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
void processUndefReads(MachineBasicBlock*);
};
} // end namepsace llvm
#endif

View File

@ -323,12 +323,6 @@ namespace llvm {
/// ExpandISelPseudos - This pass expands pseudo-instructions.
extern char &ExpandISelPseudosID;
/// This pass fixes execution time problems with dependent instructions, such
/// as switching execution domains to match.
///
/// The pass will examine instructions using and defining registers in RC.
FunctionPass *createExecutionDepsFixPass(const TargetRegisterClass *RC);
/// UnpackMachineBundles - This pass unpack machine instruction bundles.
extern char &UnpackMachineBundlesID;

View File

@ -6,21 +6,9 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the execution dependency fix pass.
//
// Some X86 SSE instructions like mov, and, or, xor are available in different
// variants for different operand types. These variant instructions are
// equivalent, but on Nehalem and newer cpus there is extra latency
// transferring data between integer and floating point domains. ARM cores
// have similar issues when they are configured with both VFP and NEON
// pipelines.
//
// This pass changes the variant instructions to minimize domain crossings.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ExecutionDepsFix.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@ -37,200 +25,6 @@ using namespace llvm;
#define DEBUG_TYPE "execution-deps-fix"
/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
/// of execution domains.
///
/// An open DomainValue represents a set of instructions that can still switch
/// execution domain. Multiple registers may refer to the same open
/// DomainValue - they will eventually be collapsed to the same execution
/// domain.
///
/// A collapsed DomainValue represents a single register that has been forced
/// into one of more execution domains. There is a separate collapsed
/// DomainValue for each register, but it may contain multiple execution
/// domains. A register value is initially created in a single execution
/// domain, but if we were forced to pay the penalty of a domain crossing, we
/// keep track of the fact that the register is now available in multiple
/// domains.
namespace {
struct DomainValue {
// Basic reference counting.
unsigned Refs;
// Bitmask of available domains. For an open DomainValue, it is the still
// possible domains for collapsing. For a collapsed DomainValue it is the
// domains where the register is available for free.
unsigned AvailableDomains;
// Pointer to the next DomainValue in a chain. When two DomainValues are
// merged, Victim.Next is set to point to Victor, so old DomainValue
// references can be updated by following the chain.
DomainValue *Next;
// Twiddleable instructions using or defining these registers.
SmallVector<MachineInstr*, 8> Instrs;
// A collapsed DomainValue has no instructions to twiddle - it simply keeps
// track of the domains where the registers are already available.
bool isCollapsed() const { return Instrs.empty(); }
// Is domain available?
bool hasDomain(unsigned domain) const {
assert(domain <
static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
"undefined behavior");
return AvailableDomains & (1u << domain);
}
// Mark domain as available.
void addDomain(unsigned domain) {
AvailableDomains |= 1u << domain;
}
// Restrict to a single domain available.
void setSingleDomain(unsigned domain) {
AvailableDomains = 1u << domain;
}
// Return bitmask of domains that are available and in mask.
unsigned getCommonDomains(unsigned mask) const {
return AvailableDomains & mask;
}
// First domain available.
unsigned getFirstDomain() const {
return countTrailingZeros(AvailableDomains);
}
DomainValue() : Refs(0) { clear(); }
// Clear this DomainValue and point to next which has all its data.
void clear() {
AvailableDomains = 0;
Next = nullptr;
Instrs.clear();
}
};
}
namespace {
/// Information about a live register.
struct LiveReg {
/// Value currently in this register, or NULL when no value is being tracked.
/// This counts as a DomainValue reference.
DomainValue *Value;
/// Instruction that defined this register, relative to the beginning of the
/// current basic block. When a LiveReg is used to represent a live-out
/// register, this value is relative to the end of the basic block, so it
/// will be a negative number.
int Def;
};
} // anonymous namespace
namespace {
class ExecutionDepsFix : public MachineFunctionPass {
static char ID;
SpecificBumpPtrAllocator<DomainValue> Allocator;
SmallVector<DomainValue*,16> Avail;
const TargetRegisterClass *const RC;
MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
RegisterClassInfo RegClassInfo;
std::vector<SmallVector<int, 1>> AliasMap;
const unsigned NumRegs;
LiveReg *LiveRegs;
struct MBBInfo {
// Keeps clearance and domain information for all registers. Note that this
// is different from the usual definition notion of liveness. The CPU
// doesn't care whether or not we consider a register killed.
LiveReg *OutRegs;
// Whether we have gotten to this block in primary processing yet.
bool PrimaryCompleted;
// The number of predecessors for which primary processing has completed
unsigned IncomingProcessed;
// The value of `IncomingProcessed` at the start of primary processing
unsigned PrimaryIncoming;
// The number of predecessors for which all processing steps are done.
unsigned IncomingCompleted;
MBBInfo()
: OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0),
PrimaryIncoming(0), IncomingCompleted(0) {}
};
typedef DenseMap<MachineBasicBlock *, MBBInfo> MBBInfoMap;
MBBInfoMap MBBInfos;
/// List of undefined register reads in this block in forward order.
std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
/// Storage for register unit liveness.
LivePhysRegs LiveRegSet;
/// Current instruction number.
/// The first instruction in each basic block is 0.
int CurInstr;
public:
ExecutionDepsFix(const TargetRegisterClass *rc)
: MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
StringRef getPassName() const override { return "Execution dependency fix"; }
private:
iterator_range<SmallVectorImpl<int>::const_iterator>
regIndices(unsigned Reg) const;
// DomainValue allocation.
DomainValue *alloc(int domain = -1);
DomainValue *retain(DomainValue *DV) {
if (DV) ++DV->Refs;
return DV;
}
void release(DomainValue*);
DomainValue *resolve(DomainValue*&);
// LiveRegs manipulations.
void setLiveReg(int rx, DomainValue *DV);
void kill(int rx);
void force(int rx, unsigned domain);
void collapse(DomainValue *dv, unsigned domain);
bool merge(DomainValue *A, DomainValue *B);
void enterBasicBlock(MachineBasicBlock*);
void leaveBasicBlock(MachineBasicBlock*);
bool isBlockDone(MachineBasicBlock *);
void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass);
void updateSuccessors(MachineBasicBlock *MBB, bool PrimaryPass);
bool visitInstr(MachineInstr *);
void processDefs(MachineInstr *, bool breakDependency, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
unsigned Pref);
bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
void processUndefReads(MachineBasicBlock*);
};
}
char ExecutionDepsFix::ID = 0;
/// Translate TRI register number to a list of indices into our smaller tables
/// of interesting registers.
iterator_range<SmallVectorImpl<int>::const_iterator>
@ -951,7 +745,3 @@ bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
return false;
}
FunctionPass *llvm::createExecutionDepsFixPass(const TargetRegisterClass *RC) {
return new ExecutionDepsFix(RC);
}

View File

@ -25,6 +25,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDepsFix.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
@ -76,6 +77,10 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("arm-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));
namespace llvm {
void initializeARMExecutionDepsFixPass(PassRegistry&);
}
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
@ -88,6 +93,7 @@ extern "C" void LLVMInitializeARMTarget() {
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
initializeARMConstantIslandsPass(Registry);
initializeARMExecutionDepsFixPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@ -441,8 +447,21 @@ public:
void addPreEmitPass() override;
};
class ARMExecutionDepsFix : public ExecutionDepsFix {
public:
static char ID;
ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {}
StringRef getPassName() const override {
return "ARM Execution Dependency Fix";
}
};
char ARMExecutionDepsFix::ID;
} // end anonymous namespace
INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix",
"ARM Execution Dependency Fix", false, false)
TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARMPassConfig(this, PM);
}
@ -536,7 +555,7 @@ void ARMPassConfig::addPreSched2() {
if (EnableARMLoadStoreOpt)
addPass(createARMLoadStoreOptimizationPass());
addPass(createExecutionDepsFixPass(&ARM::DPRRegClass));
addPass(new ARMExecutionDepsFix());
}
// Expand some pseudo instructions into multiple instructions to allow

View File

@ -30,12 +30,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDepsFix.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@ -61,6 +62,7 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
namespace llvm {
void initializeWinEHStatePassPass(PassRegistry &);
void initializeX86ExecutionDepsFixPass(PassRegistry &);
} // end namespace llvm
@ -74,6 +76,7 @@ extern "C" void LLVMInitializeX86Target() {
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
initializeEvexToVexInstPassPass(PR);
initializeX86ExecutionDepsFixPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@ -349,8 +352,21 @@ public:
void addPreSched2() override;
};
class X86ExecutionDepsFix : public ExecutionDepsFix {
public:
static char ID;
X86ExecutionDepsFix() : ExecutionDepsFix(ID, X86::VR128XRegClass) {}
StringRef getPassName() const override {
return "X86 Execution Dependency Fix";
}
};
char X86ExecutionDepsFix::ID;
} // end anonymous namespace
INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix",
"X86 Execution Dependency Fix", false, false);
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
return new X86PassConfig(this, PM);
}
@ -432,7 +448,7 @@ void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
void X86PassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createExecutionDepsFixPass(&X86::VR128XRegClass));
addPass(new X86ExecutionDepsFix());
if (UseVZeroUpper)
addPass(createX86IssueVZeroUpperPass());