diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index b2d7ca11a2d..0c07fd33405 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -15,12 +15,221 @@ #include "PPCHazardRecognizers.h" #include "PPC.h" #include "PPCInstrInfo.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { + // FIXME: Move this. + if (isBCTRAfterSet(SU)) + return true; + + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (!MCID) + return false; + + if (!MCID->mayLoad()) + return false; + + // SU is a load; for any predecessors in this dispatch group, that are stores, + // and with which we have an ordering dependency, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || !PredMCID->mayStore()) + continue; + + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } + + return false; +} + +bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (!MCID) + return false; + + if (!MCID->isBranch()) + return false; + + // SU is a branch; for any predecessors in this dispatch group, with which we + // have a data dependence and set the counter register, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) + continue; + + if (SU->Preds[i].isCtrl()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } + + return false; +} + +// FIXME: Remove this when we don't need this: +namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } + +// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. + +bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, + unsigned &NSlots) { + // FIXME: Indirectly, this information is contained in the itinerary, and + // we should derive it from there instead of separately specifying it + // here. + unsigned IIC = MCID->getSchedClass(); + switch (IIC) { + default: + NSlots = 1; + break; + case PPC::Sched::IIC_IntDivW: + case PPC::Sched::IIC_IntDivD: + case PPC::Sched::IIC_LdStLoadUpd: + case PPC::Sched::IIC_LdStLDU: + case PPC::Sched::IIC_LdStLFDU: + case PPC::Sched::IIC_LdStLFDUX: + case PPC::Sched::IIC_LdStLHA: + case PPC::Sched::IIC_LdStLHAU: + case PPC::Sched::IIC_LdStLWA: + case PPC::Sched::IIC_LdStSTDU: + case PPC::Sched::IIC_LdStSTFDU: + NSlots = 2; + break; + case PPC::Sched::IIC_LdStLoadUpdX: + case PPC::Sched::IIC_LdStLDUX: + case PPC::Sched::IIC_LdStLHAUX: + case PPC::Sched::IIC_LdStLWARX: + case PPC::Sched::IIC_LdStLDARX: + case PPC::Sched::IIC_LdStSTDUX: + case PPC::Sched::IIC_LdStSTDCX: + case PPC::Sched::IIC_LdStSTWCX: + case PPC::Sched::IIC_BrMCRX: // mtcr + // FIXME: Add sync/isync (here and in the itinerary). + NSlots = 4; + break; + } + + // FIXME: record-form instructions need a different itinerary class. + if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) + NSlots = 2; + + switch (IIC) { + default: + // All multi-slot instructions must come first. + return NSlots > 1; + case PPC::Sched::IIC_SprMFCR: + case PPC::Sched::IIC_SprMFCRF: + case PPC::Sched::IIC_SprMTSPR: + return true; + } +} + +ScheduleHazardRecognizer::HazardType +PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + if (Stalls == 0 && isLoadAfterStore(SU)) + return NoopHazard; + + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + unsigned NSlots; + if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) + return true; + + return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); +} + +unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { + // We only need to fill out a maximum of 5 slots here: The 6th slot could + // only be a second branch, and otherwise the next instruction will start a + // new group. + if (isLoadAfterStore(SU) && CurSlots < 6) { + unsigned Directive = + DAG->TM.getSubtarget().getDarwinDirective(); + // If we're using a special group-terminating nop, then we need only one. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7) + return 1; + + return 5 - CurSlots; + } + + return ScoreboardHazardRecognizer::PreEmitNoops(SU); +} + +void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID) { + if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << + SU->NodeNum << "): "); + DEBUG(DAG->dumpNode(SU)); + + unsigned NSlots; + bool MustBeFirst = mustComeFirst(MCID, NSlots); + + // If this instruction must come first, but does not, then it starts a + // new group. + if (MustBeFirst && CurSlots) { + CurSlots = CurBranches = 0; + CurGroup.clear(); + } + + CurSlots += NSlots; + CurGroup.push_back(SU); + + if (MCID->isBranch()) + ++CurBranches; + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { + return ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { + llvm_unreachable("Bottom-up scheduling not supported"); +} + +void PPCDispatchGroupSBHazardRecognizer::Reset() { + CurGroup.clear(); + CurSlots = CurBranches = 0; + return ScoreboardHazardRecognizer::Reset(); +} + +void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { + unsigned Directive = + DAG->TM.getSubtarget().getDarwinDirective(); + // If the group has now filled all of its slots, or if we're using a special + // group-terminating nop, the group is complete. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || + CurSlots == 6) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + CurGroup.push_back(0); + ++CurSlots; + } +} + //===----------------------------------------------------------------------===// // PowerPC 970 Hazard Recognizer // diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 9ee042b63e1..6b7fe41e574 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,6 +21,32 @@ namespace llvm { +/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for PPC ooo processors with dispatch-group hazards. +class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer { + const ScheduleDAG *DAG; + SmallVector CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); +public: + PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData, + const ScheduleDAG *DAG_) : + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_), + CurSlots(0), CurBranches(0) {} + + virtual HazardType getHazardType(SUnit *SU, int Stalls); + virtual bool ShouldPreferAnother(SUnit* SU); + virtual unsigned PreEmitNoops(SUnit *SU); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void RecedeCycle(); + virtual void Reset(); + virtual void EmitNoop(); +}; + /// PPCHazardRecognizer970 - This class defines a finite state automata that /// models the dispatch logic on the PowerPC 970 (aka G5) processor. This /// promotes good dispatch group formation and implements noop insertion to diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 6b20a41ef22..c5e179fec25 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -258,6 +258,15 @@ class DForm_4_zero opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class DForm_4_fixedreg_zero opcode, bits<5> R, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : DForm_4 { + let A = R; + let B = R; + let C = 0; +} + class IForm_and_DForm_1 opcode1, bit aa, bit lk, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index c4582c540ba..bd3b4924ccc 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -74,6 +74,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( const ScheduleDAG *DAG) const { unsigned Directive = TM.getSubtarget().getDarwinDirective(); + if (Directive == PPC::DIR_PWR7) + return new PPCDispatchGroupSBHazardRecognizer(II, DAG); + // Most subtargets use a PPC970 recognizer. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { @@ -85,6 +88,56 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( return new ScoreboardHazardRecognizer(II, DAG); } + +int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const { + int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, + UseMI, UseIdx); + + const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + unsigned Reg = DefMO.getReg(); + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool IsRegCR; + if (TRI->isVirtualRegister(Reg)) { + const MachineRegisterInfo *MRI = + &DefMI->getParent()->getParent()->getRegInfo(); + IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || + MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); + } else { + IsRegCR = PPC::CRRCRegClass.contains(Reg) || + PPC::CRBITRCRegClass.contains(Reg); + } + + if (UseMI->isBranch() && IsRegCR) { + if (Latency < 0) + Latency = getInstrLatency(ItinData, DefMI); + + // On some cores, there is an additional delay between writing to a condition + // register, and using it from a branch. + unsigned Directive = TM.getSubtarget().getDarwinDirective(); + switch (Directive) { + default: break; + case PPC::DIR_7400: + case PPC::DIR_750: + case PPC::DIR_970: + case PPC::DIR_E5500: + case PPC::DIR_PWR4: + case PPC::DIR_PWR5: + case PPC::DIR_PWR5X: + case PPC::DIR_PWR6: + case PPC::DIR_PWR6X: + case PPC::DIR_PWR7: + Latency += 2; + break; + } + } + + return Latency; +} + // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, @@ -218,10 +271,19 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - DebugLoc DL; - BuildMI(MBB, MI, DL, get(PPC::NOP)); -} + // This function is used for scheduling, and the nop wanted here is the type + // that terminates dispatch groups on the POWER cores. + unsigned Directive = TM.getSubtarget().getDarwinDirective(); + unsigned Opcode; + switch (Directive) { + default: Opcode = PPC::NOP; break; + case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; + case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; + } + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Opcode)); +} // Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index f140c41a2a8..7876703584d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -95,6 +95,18 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const; + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, + UseNode, UseIdx); + } + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8754b40e09f..11c296dd336 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1616,8 +1616,17 @@ def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; + def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple, []>; +let isCodeGenOnly = 1 in { +// The POWER6 and POWER7 have special group-terminating nops. +def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins), + "ori 1, 1, 0", IIC_IntSimple, []>; +def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins), + "ori 2, 2, 0", IIC_IntSimple, []>; +} + let isCompare = 1, neverHasSideEffects = 1 in { def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm), "cmpwi $crD, $rA, $imm", IIC_IntCompare>; diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td index 95b5a8b2c65..958bc90f674 100644 --- a/lib/Target/PowerPC/PPCScheduleP7.td +++ b/lib/Target/PowerPC/PPCScheduleP7.td @@ -93,6 +93,7 @@ def P7Itineraries : ProcessorItineraries< P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_FX1, P7_FX2]>], [1, 1, 1]>, + // FIXME: Add record-form itinerary data. InstrItinData, InstrStage<1, [P7_DU2], 0>, InstrStage<36, [P7_FX1, P7_FX2]>], @@ -290,7 +291,10 @@ def P7Itineraries : ProcessorItineraries< InstrStage<1, [P7_DU4], 0>, InstrStage<1, [P7_LS1, P7_LS2]>], [1, 1, 1]>, - InstrItinData, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, InstrStage<1, [P7_CRU]>, InstrStage<1, [P7_FX1, P7_FX2]>], [3, 1]>, // mtcr @@ -300,6 +304,9 @@ def P7Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [P7_CRU]>], [3, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1]>], + [4, 1]>, // mtctr InstrItinData, InstrStage<1, [P7_VS1, P7_VS2]>], diff --git a/test/CodeGen/PowerPC/pwr7-gt-nop.ll b/test/CodeGen/PowerPC/pwr7-gt-nop.ll new file mode 100644 index 00000000000..8c8545d60df --- /dev/null +++ b/test/CodeGen/PowerPC/pwr7-gt-nop.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c, float* nocapture %d) #0 { + +; CHECK-LABEL: @foo + +entry: + %0 = load float* %b, align 4 + store float %0, float* %a, align 4 + %1 = load float* %c, align 4 + store float %1, float* %b, align 4 + %2 = load float* %a, align 4 + store float %2, float* %d, align 4 + ret void + +; CHECK: lfs [[REG1:[0-9]+]], 0(4) +; CHECK: stfs [[REG1]], 0(3) +; CHECK: ori 2, 2, 0 +; CHECK: lfs [[REG2:[0-9]+]], 0(5) +; CHECK: stfs [[REG2]], 0(4) +; CHECK: ori 2, 2, 0 +; CHECK: lfs [[REG3:[0-9]+]], 0(3) +; CHECK: stfs [[REG3]], 0(6) +; CHECK: blr +} + +attributes #0 = { nounwind } +