llvm-mirror/tools/llvm-mca/Dispatch.h

//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file implements classes that are used to model register files,
/// reorder buffers and the hardware dispatch logic.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H

#include "Instruction.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include <map>

namespace mca {

class WriteState;
class DispatchUnit;
class Scheduler;
class Backend;

/// \brief Keeps track of register definitions.
///
/// This class tracks register definitions, and performs register renaming
/// to break anti dependencies.
/// By default, there is no limit in the number of register aliases which
/// can be created for the purpose of register renaming. However, users can
/// specify at object construction time a limit in the number of temporary
/// registers which can be used by the register renaming logic.
class RegisterFile {
  const llvm::MCRegisterInfo &MRI;
  // Currently used mappings and maximum used mappings.
  // These are to generate statistics only.
  unsigned NumUsedMappings;
  unsigned MaxUsedMappings;
  // Total number of mappings created over time.
  unsigned TotalMappingsCreated;

  // The maximum number of register aliases which can be used by the
  // register renamer. Defaut value for this field is zero.
  // A value of zero for this field means that there is no limit in the
  // amount of register mappings which can be created. That is equivalent
  // to having a theoretically infinite number of temporary registers.
  unsigned TotalMappings;

  // This map contains an entry for every physical register.
  // A register index is used as a key value to access a WriteState.
  // This is how we track RAW dependencies for dispatched
  // instructions. For every register, we track the last seen write only.
  // This assumes that all writes fully update both super and sub registers.
  // We need a flag in MCInstrDesc to check if a write also updates super
  // registers. We can then have a extra tablegen flag to set for instructions.
  // This is a separate patch on its own.
  std::vector<WriteState *> RegisterMappings;
  // Assumptions are:
  //  a) a false dependencies is always removed by the register renamer.
  //  b) the register renamer can create an "infinite" number of mappings.
  // Since we track the number of mappings created, in future we may
  // introduce constraints on the number of mappings that can be created.
  // For example, the maximum number of registers that are available for
  // register renaming purposes may default to the size of the register file.

  // In future, we can extend this design to allow multiple register files, and
  // apply different restrictions on the register mappings and the number of
  // temporary registers used by mappings.

public:
  RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0)
      : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0),
        TotalMappingsCreated(0), TotalMappings(Mappings),
        RegisterMappings(MRI.getNumRegs(), nullptr) {}

  // Creates a new register mapping for RegID.
  // This reserves a temporary register in the register file.
  void addRegisterMapping(WriteState &WS);

  // Invalidates register mappings associated to the input WriteState object.
  // This releases temporary registers in the register file.
  void invalidateRegisterMapping(const WriteState &WS);

  bool isAvailable(unsigned NumRegWrites);
  void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes,
                     unsigned RegID) const;
  void updateOnRead(ReadState &RS, unsigned RegID);
  unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; }
  unsigned getTotalRegisterMappingsCreated() const {
    return TotalMappingsCreated;
  }

#ifndef NDEBUG
  void dump() const;
#endif
};

/// \brief tracks which instructions are in-flight (i.e. dispatched but not
/// retired) in the OoO backend.
///
/// This class checks on every cycle if/which instructions can be retired.
/// Instructions are retired in program order.
/// In the event of instruction retired, the DispatchUnit object that owns
/// this RetireControlUnit gets notified.
/// On instruction retired, register updates are all architecturally
/// committed, and any temporary registers originally allocated for the
/// retired instruction are freed.
struct RetireControlUnit {
  // A "token" (object of class RUToken) is created by the retire unit for every
  // instruction dispatched to the schedulers.  Flag 'Executed' is used to
  // quickly check if an instruction has reached the write-back stage.  A token
  // also carries information related to the number of entries consumed by the
  // instruction in the reorder buffer. The idea is that those entries will
  // become available again once the instruction is retired.  On every cycle,
  // the RCU (Retire Control Unit) scans every token starting to search for
  // instructions that are ready to retire.  retired. Instructions are retired
  // in program order. Only 'Executed' instructions are eligible for retire.
  // Note that the size of the reorder buffer is defined by the scheduling model
  // via field 'NumMicroOpBufferSize'.
  struct RUToken {
    unsigned Index;    // Instruction index.
    unsigned NumSlots; // Slots reserved to this instruction.
    bool Executed;     // True if the instruction is past the WB stage.
  };

private:
  unsigned NextAvailableSlotIdx;
  unsigned CurrentInstructionSlotIdx;
  unsigned AvailableSlots;
  unsigned MaxRetirePerCycle; // 0 means no limit.
  std::vector<RUToken> Queue;
  DispatchUnit *Owner;

public:
  RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU)
      : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
        AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) {
    assert(NumSlots && "Expected at least one slot!");
    Queue.resize(NumSlots);
  }

  bool isFull() const { return !AvailableSlots; }
  bool isEmpty() const { return AvailableSlots == Queue.size(); }
  bool isAvailable(unsigned Quantity = 1) const {
    // Some instructions may declare a number of uOps which exceedes the size
    // of the reorder buffer. To avoid problems, cap the amount of slots to
    // the size of the reorder buffer.
    Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
    return AvailableSlots >= Quantity;
  }

  // Reserves a number of slots, and returns a new token.
  unsigned reserveSlot(unsigned Index, unsigned NumMicroOps);

  /// Retires instructions in program order.
  void cycleEvent();

  void onInstructionExecuted(unsigned TokenID);

#ifndef NDEBUG
  void dump() const;
#endif
};

// \brief Implements the hardware dispatch logic.
//
// This class is responsible for the dispatch stage, in which instructions are
// dispatched in groups to the Scheduler.  An instruction can be dispatched if
// functional units are available.
// To be more specific, an instruction can be dispatched to the Scheduler if:
//  1) There are enough entries in the reorder buffer (implemented by class
//     RetireControlUnit) to accomodate all opcodes.
//  2) There are enough temporaries to rename output register operands.
//  3) There are enough entries available in the used buffered resource(s).
//
// The number of micro opcodes that can be dispatched in one cycle is limited by
// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
// processor resources are not available (i.e. at least one of the
// abovementioned checks fails). Dispatch stall events are counted during the
// entire execution of the code, and displayed by the performance report when
// flag '-verbose' is specified.
//
// If the number of micro opcodes of an instruction is bigger than
// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
// The DispatchUnit will still have to wait for a number of cycles (depending on
// the DispatchWidth and the number of micro opcodes) before it can serve other
// instructions.
class DispatchUnit {
  unsigned DispatchWidth;
  unsigned AvailableEntries;
  unsigned CarryOver;
  Scheduler *SC;

  std::unique_ptr<RegisterFile> RAT;
  std::unique_ptr<RetireControlUnit> RCU;
  Backend *Owner;

  /// Dispatch stall event identifiers.
  ///
  /// The naming convention is:
  /// * Event names starts with the "DS_" prefix
  /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the
  ///   the unavailable resource/functional unit acronym (example: RAT)
  /// * The last substring is the event reason (example: REG_UNAVAILABLE means
  ///   that register renaming couldn't find enough spare registers in the
  ///   register file).
  ///
  /// List of acronyms used for processor resoures:
  /// RAT - Register Alias Table (used by the register renaming logic)
  /// RCU - Retire Control Unit
  /// SQ  - Scheduler's Queue
  /// LDQ - Load Queue
  /// STQ - Store Queue
  enum {
    DS_RAT_REG_UNAVAILABLE,
    DS_RCU_TOKEN_UNAVAILABLE,
    DS_SQ_TOKEN_UNAVAILABLE,
    DS_LDQ_TOKEN_UNAVAILABLE,
    DS_STQ_TOKEN_UNAVAILABLE,
    DS_DISPATCH_GROUP_RESTRICTION,
    DS_LAST
  };

  // The DispatchUnit track dispatch stall events caused by unavailable
  // of hardware resources. Events are classified based on the stall kind;
  // so we have a counter for every source of dispatch stall. Counters are
  // stored into a vector `DispatchStall` which is always of size DS_LAST.
  std::vector<unsigned> DispatchStalls;

  bool checkRAT(const InstrDesc &Desc);
  bool checkRCU(const InstrDesc &Desc);
  bool checkScheduler(const InstrDesc &Desc);

  void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
  void notifyInstructionDispatched(unsigned IID);

public:
  DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
               unsigned MicroOpBufferSize, unsigned RegisterFileSize,
               unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
               Scheduler *Sched)
      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
        CarryOver(0U), SC(Sched),
        RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
        RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
                                                 MaxRetirePerCycle, this)),
        Owner(B), DispatchStalls(DS_LAST, 0) {}

  unsigned getDispatchWidth() const { return DispatchWidth; }

  bool isAvailable(unsigned NumEntries) const {
    return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
  }

  bool isRCUEmpty() const { return RCU->isEmpty(); }

  bool canDispatch(const InstrDesc &Desc) {
    assert(isAvailable(Desc.NumMicroOps));
    return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc);
  }

  unsigned dispatch(unsigned IID, Instruction *NewInst,
                    const llvm::MCSubtargetInfo &STI);

  void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
                     unsigned RegID) const {
    return RAT->collectWrites(Vec, RegID);
  }
  unsigned getNumRATStalls() const {
    return DispatchStalls[DS_RAT_REG_UNAVAILABLE];
  }
  unsigned getNumRCUStalls() const {
    return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE];
  }
  unsigned getNumSQStalls() const {
    return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE];
  }
  unsigned getNumLDQStalls() const {
    return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE];
  }
  unsigned getNumSTQStalls() const {
    return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE];
  }
  unsigned getNumDispatchGroupStalls() const {
    return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION];
  }
  unsigned getMaxUsedRegisterMappings() const {
    return RAT->getMaxUsedRegisterMappings();
  }
  unsigned getTotalRegisterMappingsCreated() const {
    return RAT->getTotalRegisterMappingsCreated();
  }
  void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); }

  void cycleEvent(unsigned Cycle) {
    RCU->cycleEvent();
    AvailableEntries =
        CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
    CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
  }

  void notifyInstructionRetired(unsigned Index);

  void onInstructionExecuted(unsigned TokenID) {
    RCU->onInstructionExecuted(TokenID);
  }

  void invalidateRegisterMappings(const Instruction &Inst);
#ifndef NDEBUG
  void dump() const;
#endif
};

} // namespace mca

#endif