mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
Revert "[MachinePipeliner] Improve the TargetInstrInfo API analyzeLoop/reduceLoopCount"
This commit broke the ASan buildbot. See comments in rL372376 for more information. This reverts commit 15e27b0b6d9d51362fad85dbe95ac5b3fadf0a06. llvm-svn: 372425
This commit is contained in:
parent
e4d9769ce4
commit
1a7a7c7655
@ -62,7 +62,6 @@
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include <vector>
|
||||
|
||||
@ -169,7 +168,6 @@ private:
|
||||
MachineBasicBlock *BB;
|
||||
MachineBasicBlock *Preheader;
|
||||
MachineBasicBlock *NewKernel = nullptr;
|
||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
|
||||
|
||||
/// Map for each register and the max difference between its uses and def.
|
||||
/// The first element in the pair is the max difference in stages. The
|
||||
|
@ -662,50 +662,6 @@ public:
|
||||
BytesAdded);
|
||||
}
|
||||
|
||||
/// Object returned by analyzeLoopForPipelining. Allows software pipelining
|
||||
/// implementations to query attributes of the loop being pipelined and to
|
||||
/// apply target-specific updates to the loop once pipelining is complete.
|
||||
class PipelinerLoopInfo {
|
||||
public:
|
||||
virtual ~PipelinerLoopInfo();
|
||||
/// Return true if the given instruction should not be pipelined and should
|
||||
/// be ignored. An example could be a loop comparison, or induction variable
|
||||
/// update with no users being pipelined.
|
||||
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0;
|
||||
|
||||
/// Create a condition to determine if the trip count of the loop is greater
|
||||
/// than TC.
|
||||
///
|
||||
/// If the trip count is statically known to be greater than TC, return
|
||||
/// true. If the trip count is statically known to be not greater than TC,
|
||||
/// return false. Otherwise return nullopt and fill out Cond with the test
|
||||
/// condition.
|
||||
virtual Optional<bool>
|
||||
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond) = 0;
|
||||
|
||||
/// Modify the loop such that the trip count is
|
||||
/// OriginalTC + TripCountAdjust.
|
||||
virtual void adjustTripCount(int TripCountAdjust) = 0;
|
||||
|
||||
/// Called when the loop's preheader has been modified to NewPreheader.
|
||||
virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0;
|
||||
|
||||
/// Called when the loop is being removed. Any instructions in the preheader
|
||||
/// should be removed.
|
||||
///
|
||||
/// Once this function is called, no other functions on this object are
|
||||
/// valid; the loop has been removed.
|
||||
virtual void disposed() = 0;
|
||||
};
|
||||
|
||||
/// Analyze loop L, which must be a single-basic-block loop, and if the
|
||||
/// conditions can be understood enough produce a PipelinerLoopInfo object.
|
||||
virtual std::unique_ptr<PipelinerLoopInfo>
|
||||
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Analyze the loop code, return true if it cannot be understoo. Upon
|
||||
/// success, this function returns false and returns information about the
|
||||
/// induction variable and compare instruction used at the end.
|
||||
|
@ -326,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
|
||||
|
||||
LI.LoopInductionVar = nullptr;
|
||||
LI.LoopCompare = nullptr;
|
||||
if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
|
||||
if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
|
||||
NumFailLoop++;
|
||||
|
@ -105,9 +105,6 @@ void ModuloScheduleExpander::expand() {
|
||||
}
|
||||
|
||||
void ModuloScheduleExpander::generatePipelinedLoop() {
|
||||
LoopInfo = TII->analyzeLoopForPipelining(BB);
|
||||
assert(LoopInfo && "Must be able to analyze loop!");
|
||||
|
||||
// Create a new basic block for the kernel and add it to the CFG.
|
||||
MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
|
||||
|
||||
@ -850,6 +847,10 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
||||
MBBVectorTy &EpilogBBs,
|
||||
ValueMapTy *VRMap) {
|
||||
assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
|
||||
MachineInstr *IndVar;
|
||||
MachineInstr *Cmp;
|
||||
if (TII->analyzeLoop(*Schedule.getLoop(), IndVar, Cmp))
|
||||
llvm_unreachable("Must be able to analyze loop!");
|
||||
MachineBasicBlock *LastPro = KernelBB;
|
||||
MachineBasicBlock *LastEpi = KernelBB;
|
||||
|
||||
@ -857,20 +858,32 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
||||
// to the first prolog and the last epilog blocks.
|
||||
SmallVector<MachineInstr *, 4> PrevInsts;
|
||||
unsigned MaxIter = PrologBBs.size() - 1;
|
||||
unsigned LC = UINT_MAX;
|
||||
unsigned LCMin = UINT_MAX;
|
||||
for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
|
||||
// Add branches to the prolog that go to the corresponding
|
||||
// epilog, and the fall-thru prolog/kernel block.
|
||||
MachineBasicBlock *Prolog = PrologBBs[j];
|
||||
MachineBasicBlock *Epilog = EpilogBBs[i];
|
||||
|
||||
// We've executed one iteration, so decrement the loop count and check for
|
||||
// the loop end.
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
Optional<bool> StaticallyGreater =
|
||||
LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
|
||||
// Check if the LOOP0 has already been removed. If so, then there is no need
|
||||
// to reduce the trip count.
|
||||
if (LC != 0)
|
||||
LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
|
||||
PrevInsts, j, MaxIter);
|
||||
|
||||
// Record the value of the first trip count, which is used to determine if
|
||||
// branches and blocks can be removed for constant trip counts.
|
||||
if (LCMin == UINT_MAX)
|
||||
LCMin = LC;
|
||||
|
||||
unsigned numAdded = 0;
|
||||
if (!StaticallyGreater.hasValue()) {
|
||||
if (Register::isVirtualRegister(LC)) {
|
||||
Prolog->addSuccessor(Epilog);
|
||||
numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
|
||||
} else if (*StaticallyGreater == false) {
|
||||
} else if (j >= LCMin) {
|
||||
Prolog->addSuccessor(Epilog);
|
||||
Prolog->removeSuccessor(LastPro);
|
||||
LastEpi->removeSuccessor(Epilog);
|
||||
@ -881,12 +894,10 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
||||
LastEpi->clear();
|
||||
LastEpi->eraseFromParent();
|
||||
}
|
||||
if (LastPro == KernelBB) {
|
||||
LoopInfo->disposed();
|
||||
NewKernel = nullptr;
|
||||
}
|
||||
LastPro->clear();
|
||||
LastPro->eraseFromParent();
|
||||
if (LastPro == KernelBB)
|
||||
NewKernel = nullptr;
|
||||
} else {
|
||||
numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
|
||||
removePhis(Epilog, Prolog);
|
||||
@ -898,11 +909,6 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
||||
I != E && numAdded > 0; ++I, --numAdded)
|
||||
updateInstruction(&*I, false, j, 0, VRMap);
|
||||
}
|
||||
|
||||
if (NewKernel) {
|
||||
LoopInfo->setPreheader(PrologBBs[MaxIter]);
|
||||
LoopInfo->adjustTripCount(-(MaxIter + 1));
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if we can compute the amount the instruction changes
|
||||
|
@ -1257,5 +1257,3 @@ bool TargetInstrInfo::getInsertSubregInputs(
|
||||
InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
|
||||
return true;
|
||||
}
|
||||
|
||||
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
|
||||
|
@ -674,84 +674,86 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
|
||||
return 2;
|
||||
}
|
||||
|
||||
class HexagonPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
|
||||
MachineInstr *Loop, *EndLoop;
|
||||
MachineFunction *MF;
|
||||
const HexagonInstrInfo *TII;
|
||||
/// Analyze the loop code to find the loop induction variable and compare used
|
||||
/// to compute the number of iterations. Currently, we analyze loop that are
|
||||
/// controlled using hardware loops. In this case, the induction variable
|
||||
/// instruction is null. For all other cases, this function returns true, which
|
||||
/// means we're unable to analyze it.
|
||||
bool HexagonInstrInfo::analyzeLoop(MachineLoop &L,
|
||||
MachineInstr *&IndVarInst,
|
||||
MachineInstr *&CmpInst) const {
|
||||
|
||||
public:
|
||||
HexagonPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop)
|
||||
: Loop(Loop), EndLoop(EndLoop), MF(Loop->getParent()->getParent()),
|
||||
TII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
|
||||
|
||||
bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
|
||||
// Only ignore the terminator.
|
||||
return MI == EndLoop;
|
||||
}
|
||||
|
||||
Optional<bool>
|
||||
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond) override {
|
||||
if (Loop->getOpcode() == Hexagon::J2_loop0r) {
|
||||
Register LoopCount = Loop->getOperand(1).getReg();
|
||||
// Check if we're done with the loop.
|
||||
unsigned Done = TII->createVR(MF, MVT::i1);
|
||||
MachineInstr *NewCmp = BuildMI(&MBB, Loop->getDebugLoc(),
|
||||
TII->get(Hexagon::C2_cmpgtui), Done)
|
||||
.addReg(LoopCount)
|
||||
.addImm(TC);
|
||||
Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf));
|
||||
Cond.push_back(NewCmp->getOperand(0));
|
||||
return {};
|
||||
}
|
||||
|
||||
int64_t TripCount = Loop->getOperand(1).getImm();
|
||||
return TripCount > TC;
|
||||
}
|
||||
|
||||
void setPreheader(MachineBasicBlock *NewPreheader) override {
|
||||
NewPreheader->splice(NewPreheader->getFirstTerminator(), Loop->getParent(),
|
||||
Loop);
|
||||
}
|
||||
|
||||
void adjustTripCount(int TripCountAdjust) override {
|
||||
// If the loop trip count is a compile-time value, then just change the
|
||||
// value.
|
||||
if (Loop->getOpcode() == Hexagon::J2_loop0i ||
|
||||
Loop->getOpcode() == Hexagon::J2_loop1i) {
|
||||
int64_t TripCount = Loop->getOperand(1).getImm() + TripCountAdjust;
|
||||
assert(TripCount > 0 && "Can't create an empty or negative loop!");
|
||||
Loop->getOperand(1).setImm(TripCount);
|
||||
return;
|
||||
}
|
||||
|
||||
// The loop trip count is a run-time value. We generate code to subtract
|
||||
// one from the trip count, and update the loop instruction.
|
||||
Register LoopCount = Loop->getOperand(1).getReg();
|
||||
Register NewLoopCount = TII->createVR(MF, MVT::i32);
|
||||
BuildMI(*Loop->getParent(), Loop, Loop->getDebugLoc(),
|
||||
TII->get(Hexagon::A2_addi), NewLoopCount)
|
||||
.addReg(LoopCount)
|
||||
.addImm(TripCountAdjust);
|
||||
Loop->getOperand(1).setReg(NewLoopCount);
|
||||
}
|
||||
|
||||
void disposed() override { Loop->eraseFromParent(); }
|
||||
};
|
||||
|
||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
|
||||
HexagonInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
|
||||
MachineBasicBlock *LoopEnd = L.getBottomBlock();
|
||||
MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
|
||||
// We really "analyze" only hardware loops right now.
|
||||
MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
|
||||
|
||||
if (I != LoopBB->end() && isEndLoopN(I->getOpcode())) {
|
||||
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
|
||||
MachineInstr *LoopInst = findLoopInstr(
|
||||
LoopBB, I->getOpcode(), I->getOperand(0).getMBB(), VisitedBBs);
|
||||
if (LoopInst)
|
||||
return std::make_unique<HexagonPipelinerLoopInfo>(LoopInst, &*I);
|
||||
if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) {
|
||||
IndVarInst = nullptr;
|
||||
CmpInst = &*I;
|
||||
return false;
|
||||
}
|
||||
return nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Generate code to reduce the loop iteration by one and check if the loop is
|
||||
/// finished. Return the value/register of the new loop count. this function
|
||||
/// assumes the nth iteration is peeled first.
|
||||
unsigned HexagonInstrInfo::reduceLoopCount(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
|
||||
MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
|
||||
SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
|
||||
unsigned MaxIter) const {
|
||||
// We expect a hardware loop currently. This means that IndVar is set
|
||||
// to null, and the compare is the ENDLOOP instruction.
|
||||
assert((!IndVar) && isEndLoopN(Cmp.getOpcode())
|
||||
&& "Expecting a hardware loop");
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
DebugLoc DL = Cmp.getDebugLoc();
|
||||
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
|
||||
MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(),
|
||||
Cmp.getOperand(0).getMBB(), VisitedBBs);
|
||||
if (!Loop)
|
||||
return 0;
|
||||
// If the loop trip count is a compile-time value, then just change the
|
||||
// value.
|
||||
if (Loop->getOpcode() == Hexagon::J2_loop0i ||
|
||||
Loop->getOpcode() == Hexagon::J2_loop1i) {
|
||||
int64_t Offset = Loop->getOperand(1).getImm();
|
||||
if (Offset <= 1)
|
||||
Loop->eraseFromParent();
|
||||
else
|
||||
Loop->getOperand(1).setImm(Offset - 1);
|
||||
return Offset - 1;
|
||||
}
|
||||
// The loop trip count is a run-time value. We generate code to subtract
|
||||
// one from the trip count, and update the loop instruction.
|
||||
assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction");
|
||||
Register LoopCount = Loop->getOperand(1).getReg();
|
||||
// Check if we're done with the loop.
|
||||
unsigned LoopEnd = createVR(MF, MVT::i1);
|
||||
MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd).
|
||||
addReg(LoopCount).addImm(1);
|
||||
unsigned NewLoopCount = createVR(MF, MVT::i32);
|
||||
MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount).
|
||||
addReg(LoopCount).addImm(-1);
|
||||
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
|
||||
// Update the previously generated instructions with the new loop counter.
|
||||
for (SmallVectorImpl<MachineInstr *>::iterator I = PrevInsts.begin(),
|
||||
E = PrevInsts.end(); I != E; ++I)
|
||||
(*I)->substituteRegister(LoopCount, NewLoopCount, 0, HRI);
|
||||
PrevInsts.clear();
|
||||
PrevInsts.push_back(NewCmp);
|
||||
PrevInsts.push_back(NewAdd);
|
||||
// Insert the new loop instruction if this is the last time the loop is
|
||||
// decremented.
|
||||
if (Iter == MaxIter)
|
||||
BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)).
|
||||
addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount);
|
||||
// Delete the old loop instruction.
|
||||
if (Iter == 0)
|
||||
Loop->eraseFromParent();
|
||||
Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf));
|
||||
Cond.push_back(NewCmp->getOperand(0));
|
||||
return NewLoopCount;
|
||||
}
|
||||
|
||||
bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
||||
|
@ -129,10 +129,21 @@ public:
|
||||
const DebugLoc &DL,
|
||||
int *BytesAdded = nullptr) const override;
|
||||
|
||||
/// Analyze loop L, which must be a single-basic-block loop, and if the
|
||||
/// conditions can be understood enough produce a PipelinerLoopInfo object.
|
||||
std::unique_ptr<PipelinerLoopInfo>
|
||||
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
|
||||
/// Analyze the loop code, return true if it cannot be understood. Upon
|
||||
/// success, this function returns false and returns information about the
|
||||
/// induction variable and compare instruction used at the end.
|
||||
bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
|
||||
MachineInstr *&CmpInst) const override;
|
||||
|
||||
/// Generate code to reduce the loop iteration by one and check if the loop
|
||||
/// is finished. Return the value/register of the new loop count. We need
|
||||
/// this function when peeling off one or more iterations of a loop. This
|
||||
/// function assumes the nth iteration is peeled first.
|
||||
unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
|
||||
MachineInstr *IndVar, MachineInstr &Cmp,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
SmallVectorImpl<MachineInstr *> &PrevInsts,
|
||||
unsigned Iter, unsigned MaxIter) const override;
|
||||
|
||||
/// Return true if it's profitable to predicate
|
||||
/// instructions with accumulated instruction latency of "NumCycles"
|
||||
|
@ -3930,92 +3930,21 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
|
||||
return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
|
||||
}
|
||||
|
||||
class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
|
||||
MachineInstr *Loop, *EndLoop, *LoopCount;
|
||||
MachineFunction *MF;
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
public:
|
||||
PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
|
||||
MachineInstr *LoopCount)
|
||||
: Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
|
||||
MF(Loop->getParent()->getParent()),
|
||||
TII(MF->getSubtarget().getInstrInfo()) {}
|
||||
|
||||
bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
|
||||
// Only ignore the terminator.
|
||||
return MI == EndLoop;
|
||||
bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
|
||||
MachineInstr *&CmpInst) const {
|
||||
MachineBasicBlock *LoopEnd = L.getBottomBlock();
|
||||
MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
|
||||
// We really "analyze" only CTR loops right now.
|
||||
if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
|
||||
IndVarInst = nullptr;
|
||||
CmpInst = &*I;
|
||||
return false;
|
||||
}
|
||||
|
||||
Optional<bool>
|
||||
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond) override {
|
||||
bool IsConstantTripCount =
|
||||
LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI;
|
||||
if (!IsConstantTripCount) {
|
||||
// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
|
||||
// so we don't need to generate any thing here.
|
||||
Cond.push_back(MachineOperand::CreateImm(0));
|
||||
Cond.push_back(MachineOperand::CreateReg(
|
||||
MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
|
||||
true));
|
||||
return {};
|
||||
}
|
||||
|
||||
int64_t TripCount = LoopCount->getOperand(1).getImm();
|
||||
return TripCount > TC;
|
||||
}
|
||||
|
||||
void setPreheader(MachineBasicBlock *NewPreheader) override {
|
||||
// Do nothing. We want the LOOP setup instruction to stay in the *old*
|
||||
// preheader, so we can use BDZ in the prologs to adapt the loop trip count.
|
||||
}
|
||||
|
||||
void adjustTripCount(int TripCountAdjust) override {
|
||||
// If the loop trip count is a compile-time value, then just change the
|
||||
// value.
|
||||
if (LoopCount->getOpcode() == PPC::LI8 ||
|
||||
LoopCount->getOpcode() == PPC::LI) {
|
||||
int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
|
||||
LoopCount->getOperand(1).setImm(TripCount);
|
||||
return;
|
||||
}
|
||||
|
||||
// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
|
||||
// so we don't need to generate any thing here.
|
||||
}
|
||||
|
||||
void disposed() override {
|
||||
Loop->eraseFromParent();
|
||||
// Ensure the loop setup instruction is deleted too.
|
||||
LoopCount->eraseFromParent();
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
|
||||
PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
|
||||
// We really "analyze" only hardware loops right now.
|
||||
MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
|
||||
MachineBasicBlock *Preheader = *LoopBB->pred_begin();
|
||||
if (Preheader == LoopBB)
|
||||
Preheader = *std::next(LoopBB->pred_begin());
|
||||
MachineFunction *MF = Preheader->getParent();
|
||||
|
||||
if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
|
||||
SmallPtrSet<MachineBasicBlock *, 8> Visited;
|
||||
if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
|
||||
Register LoopCountReg = LoopInst->getOperand(0).getReg();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
|
||||
return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineInstr *PPCInstrInfo::findLoopInstr(
|
||||
MachineBasicBlock &PreHeader,
|
||||
SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
|
||||
MachineInstr *
|
||||
PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
|
||||
|
||||
unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
|
||||
|
||||
@ -4026,6 +3955,50 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
unsigned PPCInstrInfo::reduceLoopCount(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
|
||||
MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
|
||||
SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
|
||||
unsigned MaxIter) const {
|
||||
// We expect a hardware loop currently. This means that IndVar is set
|
||||
// to null, and the compare is the ENDLOOP instruction.
|
||||
assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
DebugLoc DL = Cmp.getDebugLoc();
|
||||
MachineInstr *Loop = findLoopInstr(PreHeader);
|
||||
if (!Loop)
|
||||
return 0;
|
||||
Register LoopCountReg = Loop->getOperand(0).getReg();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
|
||||
|
||||
if (!LoopCount)
|
||||
return 0;
|
||||
// If the loop trip count is a compile-time value, then just change the
|
||||
// value.
|
||||
if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
|
||||
int64_t Offset = LoopCount->getOperand(1).getImm();
|
||||
if (Offset <= 1) {
|
||||
LoopCount->eraseFromParent();
|
||||
Loop->eraseFromParent();
|
||||
return 0;
|
||||
}
|
||||
LoopCount->getOperand(1).setImm(Offset - 1);
|
||||
return Offset - 1;
|
||||
}
|
||||
|
||||
// The loop trip count is a run-time value.
|
||||
// We need to subtract one from the trip count,
|
||||
// and insert branch later to check if we're done with the loop.
|
||||
|
||||
// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
|
||||
// so we don't need to generate any thing here.
|
||||
Cond.push_back(MachineOperand::CreateImm(0));
|
||||
Cond.push_back(MachineOperand::CreateReg(
|
||||
Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
|
||||
return LoopCountReg;
|
||||
}
|
||||
|
||||
// Return true if get the base operand, byte offset of an instruction and the
|
||||
// memory width. Width is the size of memory that is being loaded/stored.
|
||||
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
|
||||
|
@ -486,14 +486,26 @@ public:
|
||||
/// On PPC, we have two instructions used to set-up the hardware loop
|
||||
/// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
|
||||
/// instructions to indicate the end of a loop.
|
||||
MachineInstr *
|
||||
findLoopInstr(MachineBasicBlock &PreHeader,
|
||||
SmallPtrSet<MachineBasicBlock *, 8> &Visited) const;
|
||||
MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
|
||||
|
||||
/// Analyze loop L, which must be a single-basic-block loop, and if the
|
||||
/// conditions can be understood enough produce a PipelinerLoopInfo object.
|
||||
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
|
||||
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
|
||||
/// Analyze the loop code to find the loop induction variable and compare used
|
||||
/// to compute the number of iterations. Currently, we analyze loop that are
|
||||
/// controlled using hardware loops. In this case, the induction variable
|
||||
/// instruction is null. For all other cases, this function returns true,
|
||||
/// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
|
||||
/// return new values when we can analyze the readonly loop \p L, otherwise,
|
||||
/// nothing got changed
|
||||
bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
|
||||
MachineInstr *&CmpInst) const override;
|
||||
/// Generate code to reduce the loop iteration by one and check if the loop
|
||||
/// is finished. Return the value/register of the new loop count. We need
|
||||
/// this function when peeling off one or more iterations of a loop. This
|
||||
/// function assumes the last iteration is peeled first.
|
||||
unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
|
||||
MachineInstr *IndVar, MachineInstr &Cmp,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
SmallVectorImpl<MachineInstr *> &PrevInsts,
|
||||
unsigned Iter, unsigned MaxIter) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -7,8 +7,8 @@
|
||||
|
||||
; CHECK: if ({{.*}}) jump
|
||||
; CHECK: [[VREG:v([0-9]+)]]{{.*}} = {{.*}}vmem(r{{[0-9]+}}++#1)
|
||||
; CHECK: if ({{.*}}) {{jump|jump:nt|jump:t}} [[EPLOG1:(.*)]]
|
||||
; CHECK: if ({{.*}}) {{jump|jump:nt|jump:t}} [[EPLOG:(.*)]]
|
||||
; CHECK: if ({{.*}}) {{jump|jump:nt}} [[EPLOG1:(.*)]]
|
||||
; CHECK: if ({{.*}}) {{jump|jump:nt}} [[EPLOG:(.*)]]
|
||||
; CHECK: [[EPLOG]]:
|
||||
; CHECK: [[VREG1:v([0-9]+)]] = [[VREG]]
|
||||
; CHECK: [[VREG]] = v{{[0-9]+}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user