mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Revert "[MachinePipeliner] Improve the TargetInstrInfo API analyzeLoop/reduceLoopCount"
This commit broke the ASan buildbot. See comments in rL372376 for more information. This reverts commit 15e27b0b6d9d51362fad85dbe95ac5b3fadf0a06. llvm-svn: 372425
This commit is contained in:
parent
e4d9769ce4
commit
1a7a7c7655
@ -62,7 +62,6 @@
|
|||||||
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
|
||||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -169,7 +168,6 @@ private:
|
|||||||
MachineBasicBlock *BB;
|
MachineBasicBlock *BB;
|
||||||
MachineBasicBlock *Preheader;
|
MachineBasicBlock *Preheader;
|
||||||
MachineBasicBlock *NewKernel = nullptr;
|
MachineBasicBlock *NewKernel = nullptr;
|
||||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
|
|
||||||
|
|
||||||
/// Map for each register and the max difference between its uses and def.
|
/// Map for each register and the max difference between its uses and def.
|
||||||
/// The first element in the pair is the max difference in stages. The
|
/// The first element in the pair is the max difference in stages. The
|
||||||
|
@ -662,50 +662,6 @@ public:
|
|||||||
BytesAdded);
|
BytesAdded);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Object returned by analyzeLoopForPipelining. Allows software pipelining
|
|
||||||
/// implementations to query attributes of the loop being pipelined and to
|
|
||||||
/// apply target-specific updates to the loop once pipelining is complete.
|
|
||||||
class PipelinerLoopInfo {
|
|
||||||
public:
|
|
||||||
virtual ~PipelinerLoopInfo();
|
|
||||||
/// Return true if the given instruction should not be pipelined and should
|
|
||||||
/// be ignored. An example could be a loop comparison, or induction variable
|
|
||||||
/// update with no users being pipelined.
|
|
||||||
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0;
|
|
||||||
|
|
||||||
/// Create a condition to determine if the trip count of the loop is greater
|
|
||||||
/// than TC.
|
|
||||||
///
|
|
||||||
/// If the trip count is statically known to be greater than TC, return
|
|
||||||
/// true. If the trip count is statically known to be not greater than TC,
|
|
||||||
/// return false. Otherwise return nullopt and fill out Cond with the test
|
|
||||||
/// condition.
|
|
||||||
virtual Optional<bool>
|
|
||||||
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
|
|
||||||
SmallVectorImpl<MachineOperand> &Cond) = 0;
|
|
||||||
|
|
||||||
/// Modify the loop such that the trip count is
|
|
||||||
/// OriginalTC + TripCountAdjust.
|
|
||||||
virtual void adjustTripCount(int TripCountAdjust) = 0;
|
|
||||||
|
|
||||||
/// Called when the loop's preheader has been modified to NewPreheader.
|
|
||||||
virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0;
|
|
||||||
|
|
||||||
/// Called when the loop is being removed. Any instructions in the preheader
|
|
||||||
/// should be removed.
|
|
||||||
///
|
|
||||||
/// Once this function is called, no other functions on this object are
|
|
||||||
/// valid; the loop has been removed.
|
|
||||||
virtual void disposed() = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Analyze loop L, which must be a single-basic-block loop, and if the
|
|
||||||
/// conditions can be understood enough produce a PipelinerLoopInfo object.
|
|
||||||
virtual std::unique_ptr<PipelinerLoopInfo>
|
|
||||||
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Analyze the loop code, return true if it cannot be understoo. Upon
|
/// Analyze the loop code, return true if it cannot be understoo. Upon
|
||||||
/// success, this function returns false and returns information about the
|
/// success, this function returns false and returns information about the
|
||||||
/// induction variable and compare instruction used at the end.
|
/// induction variable and compare instruction used at the end.
|
||||||
|
@ -326,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
|
|||||||
|
|
||||||
LI.LoopInductionVar = nullptr;
|
LI.LoopInductionVar = nullptr;
|
||||||
LI.LoopCompare = nullptr;
|
LI.LoopCompare = nullptr;
|
||||||
if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
|
if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
|
||||||
LLVM_DEBUG(
|
LLVM_DEBUG(
|
||||||
dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
|
dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
|
||||||
NumFailLoop++;
|
NumFailLoop++;
|
||||||
|
@ -105,9 +105,6 @@ void ModuloScheduleExpander::expand() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ModuloScheduleExpander::generatePipelinedLoop() {
|
void ModuloScheduleExpander::generatePipelinedLoop() {
|
||||||
LoopInfo = TII->analyzeLoopForPipelining(BB);
|
|
||||||
assert(LoopInfo && "Must be able to analyze loop!");
|
|
||||||
|
|
||||||
// Create a new basic block for the kernel and add it to the CFG.
|
// Create a new basic block for the kernel and add it to the CFG.
|
||||||
MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
|
MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
|
||||||
|
|
||||||
@ -850,6 +847,10 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
|||||||
MBBVectorTy &EpilogBBs,
|
MBBVectorTy &EpilogBBs,
|
||||||
ValueMapTy *VRMap) {
|
ValueMapTy *VRMap) {
|
||||||
assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
|
assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
|
||||||
|
MachineInstr *IndVar;
|
||||||
|
MachineInstr *Cmp;
|
||||||
|
if (TII->analyzeLoop(*Schedule.getLoop(), IndVar, Cmp))
|
||||||
|
llvm_unreachable("Must be able to analyze loop!");
|
||||||
MachineBasicBlock *LastPro = KernelBB;
|
MachineBasicBlock *LastPro = KernelBB;
|
||||||
MachineBasicBlock *LastEpi = KernelBB;
|
MachineBasicBlock *LastEpi = KernelBB;
|
||||||
|
|
||||||
@ -857,20 +858,32 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
|||||||
// to the first prolog and the last epilog blocks.
|
// to the first prolog and the last epilog blocks.
|
||||||
SmallVector<MachineInstr *, 4> PrevInsts;
|
SmallVector<MachineInstr *, 4> PrevInsts;
|
||||||
unsigned MaxIter = PrologBBs.size() - 1;
|
unsigned MaxIter = PrologBBs.size() - 1;
|
||||||
|
unsigned LC = UINT_MAX;
|
||||||
|
unsigned LCMin = UINT_MAX;
|
||||||
for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
|
for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
|
||||||
// Add branches to the prolog that go to the corresponding
|
// Add branches to the prolog that go to the corresponding
|
||||||
// epilog, and the fall-thru prolog/kernel block.
|
// epilog, and the fall-thru prolog/kernel block.
|
||||||
MachineBasicBlock *Prolog = PrologBBs[j];
|
MachineBasicBlock *Prolog = PrologBBs[j];
|
||||||
MachineBasicBlock *Epilog = EpilogBBs[i];
|
MachineBasicBlock *Epilog = EpilogBBs[i];
|
||||||
|
// We've executed one iteration, so decrement the loop count and check for
|
||||||
|
// the loop end.
|
||||||
SmallVector<MachineOperand, 4> Cond;
|
SmallVector<MachineOperand, 4> Cond;
|
||||||
Optional<bool> StaticallyGreater =
|
// Check if the LOOP0 has already been removed. If so, then there is no need
|
||||||
LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
|
// to reduce the trip count.
|
||||||
|
if (LC != 0)
|
||||||
|
LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
|
||||||
|
PrevInsts, j, MaxIter);
|
||||||
|
|
||||||
|
// Record the value of the first trip count, which is used to determine if
|
||||||
|
// branches and blocks can be removed for constant trip counts.
|
||||||
|
if (LCMin == UINT_MAX)
|
||||||
|
LCMin = LC;
|
||||||
|
|
||||||
unsigned numAdded = 0;
|
unsigned numAdded = 0;
|
||||||
if (!StaticallyGreater.hasValue()) {
|
if (Register::isVirtualRegister(LC)) {
|
||||||
Prolog->addSuccessor(Epilog);
|
Prolog->addSuccessor(Epilog);
|
||||||
numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
|
numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
|
||||||
} else if (*StaticallyGreater == false) {
|
} else if (j >= LCMin) {
|
||||||
Prolog->addSuccessor(Epilog);
|
Prolog->addSuccessor(Epilog);
|
||||||
Prolog->removeSuccessor(LastPro);
|
Prolog->removeSuccessor(LastPro);
|
||||||
LastEpi->removeSuccessor(Epilog);
|
LastEpi->removeSuccessor(Epilog);
|
||||||
@ -881,12 +894,10 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
|||||||
LastEpi->clear();
|
LastEpi->clear();
|
||||||
LastEpi->eraseFromParent();
|
LastEpi->eraseFromParent();
|
||||||
}
|
}
|
||||||
if (LastPro == KernelBB) {
|
|
||||||
LoopInfo->disposed();
|
|
||||||
NewKernel = nullptr;
|
|
||||||
}
|
|
||||||
LastPro->clear();
|
LastPro->clear();
|
||||||
LastPro->eraseFromParent();
|
LastPro->eraseFromParent();
|
||||||
|
if (LastPro == KernelBB)
|
||||||
|
NewKernel = nullptr;
|
||||||
} else {
|
} else {
|
||||||
numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
|
numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
|
||||||
removePhis(Epilog, Prolog);
|
removePhis(Epilog, Prolog);
|
||||||
@ -898,11 +909,6 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
|||||||
I != E && numAdded > 0; ++I, --numAdded)
|
I != E && numAdded > 0; ++I, --numAdded)
|
||||||
updateInstruction(&*I, false, j, 0, VRMap);
|
updateInstruction(&*I, false, j, 0, VRMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NewKernel) {
|
|
||||||
LoopInfo->setPreheader(PrologBBs[MaxIter]);
|
|
||||||
LoopInfo->adjustTripCount(-(MaxIter + 1));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return true if we can compute the amount the instruction changes
|
/// Return true if we can compute the amount the instruction changes
|
||||||
|
@ -1257,5 +1257,3 @@ bool TargetInstrInfo::getInsertSubregInputs(
|
|||||||
InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
|
InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
|
|
||||||
|
@ -674,84 +674,86 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
|
|||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
class HexagonPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
|
/// Analyze the loop code to find the loop induction variable and compare used
|
||||||
MachineInstr *Loop, *EndLoop;
|
/// to compute the number of iterations. Currently, we analyze loop that are
|
||||||
MachineFunction *MF;
|
/// controlled using hardware loops. In this case, the induction variable
|
||||||
const HexagonInstrInfo *TII;
|
/// instruction is null. For all other cases, this function returns true, which
|
||||||
|
/// means we're unable to analyze it.
|
||||||
|
bool HexagonInstrInfo::analyzeLoop(MachineLoop &L,
|
||||||
|
MachineInstr *&IndVarInst,
|
||||||
|
MachineInstr *&CmpInst) const {
|
||||||
|
|
||||||
public:
|
MachineBasicBlock *LoopEnd = L.getBottomBlock();
|
||||||
HexagonPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop)
|
MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
|
||||||
: Loop(Loop), EndLoop(EndLoop), MF(Loop->getParent()->getParent()),
|
|
||||||
TII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
|
|
||||||
|
|
||||||
bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
|
|
||||||
// Only ignore the terminator.
|
|
||||||
return MI == EndLoop;
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional<bool>
|
|
||||||
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
|
|
||||||
SmallVectorImpl<MachineOperand> &Cond) override {
|
|
||||||
if (Loop->getOpcode() == Hexagon::J2_loop0r) {
|
|
||||||
Register LoopCount = Loop->getOperand(1).getReg();
|
|
||||||
// Check if we're done with the loop.
|
|
||||||
unsigned Done = TII->createVR(MF, MVT::i1);
|
|
||||||
MachineInstr *NewCmp = BuildMI(&MBB, Loop->getDebugLoc(),
|
|
||||||
TII->get(Hexagon::C2_cmpgtui), Done)
|
|
||||||
.addReg(LoopCount)
|
|
||||||
.addImm(TC);
|
|
||||||
Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf));
|
|
||||||
Cond.push_back(NewCmp->getOperand(0));
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t TripCount = Loop->getOperand(1).getImm();
|
|
||||||
return TripCount > TC;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setPreheader(MachineBasicBlock *NewPreheader) override {
|
|
||||||
NewPreheader->splice(NewPreheader->getFirstTerminator(), Loop->getParent(),
|
|
||||||
Loop);
|
|
||||||
}
|
|
||||||
|
|
||||||
void adjustTripCount(int TripCountAdjust) override {
|
|
||||||
// If the loop trip count is a compile-time value, then just change the
|
|
||||||
// value.
|
|
||||||
if (Loop->getOpcode() == Hexagon::J2_loop0i ||
|
|
||||||
Loop->getOpcode() == Hexagon::J2_loop1i) {
|
|
||||||
int64_t TripCount = Loop->getOperand(1).getImm() + TripCountAdjust;
|
|
||||||
assert(TripCount > 0 && "Can't create an empty or negative loop!");
|
|
||||||
Loop->getOperand(1).setImm(TripCount);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The loop trip count is a run-time value. We generate code to subtract
|
|
||||||
// one from the trip count, and update the loop instruction.
|
|
||||||
Register LoopCount = Loop->getOperand(1).getReg();
|
|
||||||
Register NewLoopCount = TII->createVR(MF, MVT::i32);
|
|
||||||
BuildMI(*Loop->getParent(), Loop, Loop->getDebugLoc(),
|
|
||||||
TII->get(Hexagon::A2_addi), NewLoopCount)
|
|
||||||
.addReg(LoopCount)
|
|
||||||
.addImm(TripCountAdjust);
|
|
||||||
Loop->getOperand(1).setReg(NewLoopCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
void disposed() override { Loop->eraseFromParent(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
|
|
||||||
HexagonInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
|
|
||||||
// We really "analyze" only hardware loops right now.
|
// We really "analyze" only hardware loops right now.
|
||||||
MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
|
if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) {
|
||||||
|
IndVarInst = nullptr;
|
||||||
if (I != LoopBB->end() && isEndLoopN(I->getOpcode())) {
|
CmpInst = &*I;
|
||||||
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
|
return false;
|
||||||
MachineInstr *LoopInst = findLoopInstr(
|
|
||||||
LoopBB, I->getOpcode(), I->getOperand(0).getMBB(), VisitedBBs);
|
|
||||||
if (LoopInst)
|
|
||||||
return std::make_unique<HexagonPipelinerLoopInfo>(LoopInst, &*I);
|
|
||||||
}
|
}
|
||||||
return nullptr;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate code to reduce the loop iteration by one and check if the loop is
|
||||||
|
/// finished. Return the value/register of the new loop count. this function
|
||||||
|
/// assumes the nth iteration is peeled first.
|
||||||
|
unsigned HexagonInstrInfo::reduceLoopCount(
|
||||||
|
MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
|
||||||
|
MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
|
||||||
|
unsigned MaxIter) const {
|
||||||
|
// We expect a hardware loop currently. This means that IndVar is set
|
||||||
|
// to null, and the compare is the ENDLOOP instruction.
|
||||||
|
assert((!IndVar) && isEndLoopN(Cmp.getOpcode())
|
||||||
|
&& "Expecting a hardware loop");
|
||||||
|
MachineFunction *MF = MBB.getParent();
|
||||||
|
DebugLoc DL = Cmp.getDebugLoc();
|
||||||
|
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
|
||||||
|
MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(),
|
||||||
|
Cmp.getOperand(0).getMBB(), VisitedBBs);
|
||||||
|
if (!Loop)
|
||||||
|
return 0;
|
||||||
|
// If the loop trip count is a compile-time value, then just change the
|
||||||
|
// value.
|
||||||
|
if (Loop->getOpcode() == Hexagon::J2_loop0i ||
|
||||||
|
Loop->getOpcode() == Hexagon::J2_loop1i) {
|
||||||
|
int64_t Offset = Loop->getOperand(1).getImm();
|
||||||
|
if (Offset <= 1)
|
||||||
|
Loop->eraseFromParent();
|
||||||
|
else
|
||||||
|
Loop->getOperand(1).setImm(Offset - 1);
|
||||||
|
return Offset - 1;
|
||||||
|
}
|
||||||
|
// The loop trip count is a run-time value. We generate code to subtract
|
||||||
|
// one from the trip count, and update the loop instruction.
|
||||||
|
assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction");
|
||||||
|
Register LoopCount = Loop->getOperand(1).getReg();
|
||||||
|
// Check if we're done with the loop.
|
||||||
|
unsigned LoopEnd = createVR(MF, MVT::i1);
|
||||||
|
MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd).
|
||||||
|
addReg(LoopCount).addImm(1);
|
||||||
|
unsigned NewLoopCount = createVR(MF, MVT::i32);
|
||||||
|
MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount).
|
||||||
|
addReg(LoopCount).addImm(-1);
|
||||||
|
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
|
||||||
|
// Update the previously generated instructions with the new loop counter.
|
||||||
|
for (SmallVectorImpl<MachineInstr *>::iterator I = PrevInsts.begin(),
|
||||||
|
E = PrevInsts.end(); I != E; ++I)
|
||||||
|
(*I)->substituteRegister(LoopCount, NewLoopCount, 0, HRI);
|
||||||
|
PrevInsts.clear();
|
||||||
|
PrevInsts.push_back(NewCmp);
|
||||||
|
PrevInsts.push_back(NewAdd);
|
||||||
|
// Insert the new loop instruction if this is the last time the loop is
|
||||||
|
// decremented.
|
||||||
|
if (Iter == MaxIter)
|
||||||
|
BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)).
|
||||||
|
addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount);
|
||||||
|
// Delete the old loop instruction.
|
||||||
|
if (Iter == 0)
|
||||||
|
Loop->eraseFromParent();
|
||||||
|
Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf));
|
||||||
|
Cond.push_back(NewCmp->getOperand(0));
|
||||||
|
return NewLoopCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
||||||
|
@ -129,10 +129,21 @@ public:
|
|||||||
const DebugLoc &DL,
|
const DebugLoc &DL,
|
||||||
int *BytesAdded = nullptr) const override;
|
int *BytesAdded = nullptr) const override;
|
||||||
|
|
||||||
/// Analyze loop L, which must be a single-basic-block loop, and if the
|
/// Analyze the loop code, return true if it cannot be understood. Upon
|
||||||
/// conditions can be understood enough produce a PipelinerLoopInfo object.
|
/// success, this function returns false and returns information about the
|
||||||
std::unique_ptr<PipelinerLoopInfo>
|
/// induction variable and compare instruction used at the end.
|
||||||
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
|
bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
|
||||||
|
MachineInstr *&CmpInst) const override;
|
||||||
|
|
||||||
|
/// Generate code to reduce the loop iteration by one and check if the loop
|
||||||
|
/// is finished. Return the value/register of the new loop count. We need
|
||||||
|
/// this function when peeling off one or more iterations of a loop. This
|
||||||
|
/// function assumes the nth iteration is peeled first.
|
||||||
|
unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
|
||||||
|
MachineInstr *IndVar, MachineInstr &Cmp,
|
||||||
|
SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
SmallVectorImpl<MachineInstr *> &PrevInsts,
|
||||||
|
unsigned Iter, unsigned MaxIter) const override;
|
||||||
|
|
||||||
/// Return true if it's profitable to predicate
|
/// Return true if it's profitable to predicate
|
||||||
/// instructions with accumulated instruction latency of "NumCycles"
|
/// instructions with accumulated instruction latency of "NumCycles"
|
||||||
|
@ -3930,92 +3930,21 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
|
|||||||
return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
|
return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
|
||||||
}
|
}
|
||||||
|
|
||||||
class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
|
bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
|
||||||
MachineInstr *Loop, *EndLoop, *LoopCount;
|
MachineInstr *&CmpInst) const {
|
||||||
MachineFunction *MF;
|
MachineBasicBlock *LoopEnd = L.getBottomBlock();
|
||||||
const TargetInstrInfo *TII;
|
MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
|
||||||
|
// We really "analyze" only CTR loops right now.
|
||||||
public:
|
if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
|
||||||
PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
|
IndVarInst = nullptr;
|
||||||
MachineInstr *LoopCount)
|
CmpInst = &*I;
|
||||||
: Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
|
return false;
|
||||||
MF(Loop->getParent()->getParent()),
|
|
||||||
TII(MF->getSubtarget().getInstrInfo()) {}
|
|
||||||
|
|
||||||
bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
|
|
||||||
// Only ignore the terminator.
|
|
||||||
return MI == EndLoop;
|
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
Optional<bool>
|
|
||||||
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
|
|
||||||
SmallVectorImpl<MachineOperand> &Cond) override {
|
|
||||||
bool IsConstantTripCount =
|
|
||||||
LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI;
|
|
||||||
if (!IsConstantTripCount) {
|
|
||||||
// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
|
|
||||||
// so we don't need to generate any thing here.
|
|
||||||
Cond.push_back(MachineOperand::CreateImm(0));
|
|
||||||
Cond.push_back(MachineOperand::CreateReg(
|
|
||||||
MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
|
|
||||||
true));
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t TripCount = LoopCount->getOperand(1).getImm();
|
|
||||||
return TripCount > TC;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setPreheader(MachineBasicBlock *NewPreheader) override {
|
|
||||||
// Do nothing. We want the LOOP setup instruction to stay in the *old*
|
|
||||||
// preheader, so we can use BDZ in the prologs to adapt the loop trip count.
|
|
||||||
}
|
|
||||||
|
|
||||||
void adjustTripCount(int TripCountAdjust) override {
|
|
||||||
// If the loop trip count is a compile-time value, then just change the
|
|
||||||
// value.
|
|
||||||
if (LoopCount->getOpcode() == PPC::LI8 ||
|
|
||||||
LoopCount->getOpcode() == PPC::LI) {
|
|
||||||
int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
|
|
||||||
LoopCount->getOperand(1).setImm(TripCount);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
|
|
||||||
// so we don't need to generate any thing here.
|
|
||||||
}
|
|
||||||
|
|
||||||
void disposed() override {
|
|
||||||
Loop->eraseFromParent();
|
|
||||||
// Ensure the loop setup instruction is deleted too.
|
|
||||||
LoopCount->eraseFromParent();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
|
|
||||||
PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
|
|
||||||
// We really "analyze" only hardware loops right now.
|
|
||||||
MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
|
|
||||||
MachineBasicBlock *Preheader = *LoopBB->pred_begin();
|
|
||||||
if (Preheader == LoopBB)
|
|
||||||
Preheader = *std::next(LoopBB->pred_begin());
|
|
||||||
MachineFunction *MF = Preheader->getParent();
|
|
||||||
|
|
||||||
if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
|
|
||||||
SmallPtrSet<MachineBasicBlock *, 8> Visited;
|
|
||||||
if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
|
|
||||||
Register LoopCountReg = LoopInst->getOperand(0).getReg();
|
|
||||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
|
||||||
MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
|
|
||||||
return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MachineInstr *PPCInstrInfo::findLoopInstr(
|
MachineInstr *
|
||||||
MachineBasicBlock &PreHeader,
|
PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
|
||||||
SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
|
|
||||||
|
|
||||||
unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
|
unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
|
||||||
|
|
||||||
@ -4026,6 +3955,50 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned PPCInstrInfo::reduceLoopCount(
|
||||||
|
MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
|
||||||
|
MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
|
||||||
|
unsigned MaxIter) const {
|
||||||
|
// We expect a hardware loop currently. This means that IndVar is set
|
||||||
|
// to null, and the compare is the ENDLOOP instruction.
|
||||||
|
assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
|
||||||
|
MachineFunction *MF = MBB.getParent();
|
||||||
|
DebugLoc DL = Cmp.getDebugLoc();
|
||||||
|
MachineInstr *Loop = findLoopInstr(PreHeader);
|
||||||
|
if (!Loop)
|
||||||
|
return 0;
|
||||||
|
Register LoopCountReg = Loop->getOperand(0).getReg();
|
||||||
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
|
||||||
|
|
||||||
|
if (!LoopCount)
|
||||||
|
return 0;
|
||||||
|
// If the loop trip count is a compile-time value, then just change the
|
||||||
|
// value.
|
||||||
|
if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
|
||||||
|
int64_t Offset = LoopCount->getOperand(1).getImm();
|
||||||
|
if (Offset <= 1) {
|
||||||
|
LoopCount->eraseFromParent();
|
||||||
|
Loop->eraseFromParent();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
LoopCount->getOperand(1).setImm(Offset - 1);
|
||||||
|
return Offset - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The loop trip count is a run-time value.
|
||||||
|
// We need to subtract one from the trip count,
|
||||||
|
// and insert branch later to check if we're done with the loop.
|
||||||
|
|
||||||
|
// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
|
||||||
|
// so we don't need to generate any thing here.
|
||||||
|
Cond.push_back(MachineOperand::CreateImm(0));
|
||||||
|
Cond.push_back(MachineOperand::CreateReg(
|
||||||
|
Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
|
||||||
|
return LoopCountReg;
|
||||||
|
}
|
||||||
|
|
||||||
// Return true if get the base operand, byte offset of an instruction and the
|
// Return true if get the base operand, byte offset of an instruction and the
|
||||||
// memory width. Width is the size of memory that is being loaded/stored.
|
// memory width. Width is the size of memory that is being loaded/stored.
|
||||||
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
|
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
|
||||||
|
@ -486,14 +486,26 @@ public:
|
|||||||
/// On PPC, we have two instructions used to set-up the hardware loop
|
/// On PPC, we have two instructions used to set-up the hardware loop
|
||||||
/// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
|
/// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
|
||||||
/// instructions to indicate the end of a loop.
|
/// instructions to indicate the end of a loop.
|
||||||
MachineInstr *
|
MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
|
||||||
findLoopInstr(MachineBasicBlock &PreHeader,
|
|
||||||
SmallPtrSet<MachineBasicBlock *, 8> &Visited) const;
|
|
||||||
|
|
||||||
/// Analyze loop L, which must be a single-basic-block loop, and if the
|
/// Analyze the loop code to find the loop induction variable and compare used
|
||||||
/// conditions can be understood enough produce a PipelinerLoopInfo object.
|
/// to compute the number of iterations. Currently, we analyze loop that are
|
||||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
|
/// controlled using hardware loops. In this case, the induction variable
|
||||||
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
|
/// instruction is null. For all other cases, this function returns true,
|
||||||
|
/// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
|
||||||
|
/// return new values when we can analyze the readonly loop \p L, otherwise,
|
||||||
|
/// nothing got changed
|
||||||
|
bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
|
||||||
|
MachineInstr *&CmpInst) const override;
|
||||||
|
/// Generate code to reduce the loop iteration by one and check if the loop
|
||||||
|
/// is finished. Return the value/register of the new loop count. We need
|
||||||
|
/// this function when peeling off one or more iterations of a loop. This
|
||||||
|
/// function assumes the last iteration is peeled first.
|
||||||
|
unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
|
||||||
|
MachineInstr *IndVar, MachineInstr &Cmp,
|
||||||
|
SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
SmallVectorImpl<MachineInstr *> &PrevInsts,
|
||||||
|
unsigned Iter, unsigned MaxIter) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
; CHECK: if ({{.*}}) jump
|
; CHECK: if ({{.*}}) jump
|
||||||
; CHECK: [[VREG:v([0-9]+)]]{{.*}} = {{.*}}vmem(r{{[0-9]+}}++#1)
|
; CHECK: [[VREG:v([0-9]+)]]{{.*}} = {{.*}}vmem(r{{[0-9]+}}++#1)
|
||||||
; CHECK: if ({{.*}}) {{jump|jump:nt|jump:t}} [[EPLOG1:(.*)]]
|
; CHECK: if ({{.*}}) {{jump|jump:nt}} [[EPLOG1:(.*)]]
|
||||||
; CHECK: if ({{.*}}) {{jump|jump:nt|jump:t}} [[EPLOG:(.*)]]
|
; CHECK: if ({{.*}}) {{jump|jump:nt}} [[EPLOG:(.*)]]
|
||||||
; CHECK: [[EPLOG]]:
|
; CHECK: [[EPLOG]]:
|
||||||
; CHECK: [[VREG1:v([0-9]+)]] = [[VREG]]
|
; CHECK: [[VREG1:v([0-9]+)]] = [[VREG]]
|
||||||
; CHECK: [[VREG]] = v{{[0-9]+}}
|
; CHECK: [[VREG]] = v{{[0-9]+}}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user