llvm-mirror/tools/llvm-mca/Views/TimelineView.cpp

//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \brief
///
/// This file implements the TimelineView interface.
///
//===----------------------------------------------------------------------===//

#include "Views/TimelineView.h"
#include <numeric>

namespace llvm {
namespace mca {

TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
                           llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
                           unsigned Cycles)
    : InstructionView(sti, Printer, S), CurrentCycle(0),
      MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles),
      LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) {
  unsigned NumInstructions = getSource().size();
  assert(Iterations && "Invalid number of iterations specified!");
  NumInstructions *= Iterations;
  Timeline.resize(NumInstructions);
  TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};
  std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);

  WaitTimeEntry NullWTEntry = {0, 0, 0};
  std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);

  std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,
                                                  /* unknown buffer size */ -1};
  std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);
}

void TimelineView::onReservedBuffers(const InstRef &IR,
                                     ArrayRef<unsigned> Buffers) {
  if (IR.getSourceIndex() >= getSource().size())
    return;

  const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
  std::pair<unsigned, int> BufferInfo = {0, -1};
  for (const unsigned Buffer : Buffers) {
    const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
    if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {
      BufferInfo.first = Buffer;
      BufferInfo.second = MCDesc.BufferSize;
    }
  }

  UsedBuffer[IR.getSourceIndex()] = BufferInfo;
}

void TimelineView::onEvent(const HWInstructionEvent &Event) {
  const unsigned Index = Event.IR.getSourceIndex();
  if (Index >= Timeline.size())
    return;

  switch (Event.Type) {
  case HWInstructionEvent::Retired: {
    TimelineViewEntry &TVEntry = Timeline[Index];
    if (CurrentCycle < MaxCycle)
      TVEntry.CycleRetired = CurrentCycle;

    // Update the WaitTime entry which corresponds to this Index.
    assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
    unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
    WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()];
    WTEntry.CyclesSpentInSchedulerQueue +=
        TVEntry.CycleIssued - CycleDispatched;
    assert(CycleDispatched <= TVEntry.CycleReady &&
           "Instruction cannot be ready if it hasn't been dispatched yet!");
    WTEntry.CyclesSpentInSQWhileReady +=
        TVEntry.CycleIssued - TVEntry.CycleReady;
    if (CurrentCycle > TVEntry.CycleExecuted) {
      WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
          (CurrentCycle - 1) - TVEntry.CycleExecuted;
    }
    break;
  }
  case HWInstructionEvent::Ready:
    Timeline[Index].CycleReady = CurrentCycle;
    break;
  case HWInstructionEvent::Issued:
    Timeline[Index].CycleIssued = CurrentCycle;
    break;
  case HWInstructionEvent::Executed:
    Timeline[Index].CycleExecuted = CurrentCycle;
    break;
  case HWInstructionEvent::Dispatched:
    // There may be multiple dispatch events. Microcoded instructions that are
    // expanded into multiple uOps may require multiple dispatch cycles. Here,
    // we want to capture the first dispatch cycle.
    if (Timeline[Index].CycleDispatched == -1)
      Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
    break;
  default:
    return;
  }
  if (CurrentCycle < MaxCycle)
    LastCycle = std::max(LastCycle, CurrentCycle);
}

static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
                                       unsigned Executions, int BufferSize) {
  if (CumulativeCycles && BufferSize < 0)
    return raw_ostream::MAGENTA;
  unsigned Size = static_cast<unsigned>(BufferSize);
  if (CumulativeCycles >= Size * Executions)
    return raw_ostream::RED;
  if ((CumulativeCycles * 2) >= Size * Executions)
    return raw_ostream::YELLOW;
  return raw_ostream::SAVEDCOLOR;
}

static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
                           unsigned Executions, int BufferSize) {
  if (!OS.has_colors())
    return;

  raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
  if (Color == raw_ostream::SAVEDCOLOR) {
    OS.resetColor();
    return;
  }
  OS.changeColor(Color, /* bold */ true, /* BG */ false);
}

void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
                                      const WaitTimeEntry &Entry,
                                      unsigned SourceIndex,
                                      unsigned Executions) const {
  bool PrintingTotals = SourceIndex == getSource().size();
  unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;

  if (!PrintingTotals)
    OS << SourceIndex << '.';

  OS.PadToColumn(7);

  double AverageTime1, AverageTime2, AverageTime3;
  AverageTime1 =
      (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
  AverageTime2 =
      (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
  AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
                 CumulativeExecutions;

  OS << Executions;
  OS.PadToColumn(13);

  int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second;
  if (!PrintingTotals)
    tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
                   BufferSize);
  OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
  OS.PadToColumn(20);
  if (!PrintingTotals)
    tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
                   BufferSize);
  OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
  OS.PadToColumn(27);
  if (!PrintingTotals)
    tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
                   CumulativeExecutions,
                   getSubTargetInfo().getSchedModel().MicroOpBufferSize);
  OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);

  if (OS.has_colors())
    OS.resetColor();
  OS.PadToColumn(34);
}

void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
  std::string Header =
      "\n\nAverage Wait times (based on the timeline view):\n"
      "[0]: Executions\n"
      "[1]: Average time spent waiting in a scheduler's queue\n"
      "[2]: Average time spent waiting in a scheduler's queue while ready\n"
      "[3]: Average time elapsed from WB until retire stage\n\n"
      "      [0]    [1]    [2]    [3]\n";
  OS << Header;
  formatted_raw_ostream FOS(OS);
  unsigned Executions = Timeline.size() / getSource().size();
  unsigned IID = 0;
  for (const MCInst &Inst : getSource()) {
    printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
    FOS << "   " << printInstructionString(Inst) << '\n';
    FOS.flush();
    ++IID;
  }

  // If the timeline contains more than one instruction,
  // let's also print global averages.
  if (getSource().size() != 1) {
    WaitTimeEntry TotalWaitTime = std::accumulate(
        WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},
        [](const WaitTimeEntry &A, const WaitTimeEntry &B) {
          return WaitTimeEntry{
              A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue,
              A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady,
              A.CyclesSpentAfterWBAndBeforeRetire +
                  B.CyclesSpentAfterWBAndBeforeRetire};
        });
    printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);
    FOS << "   "
        << "<total>" << '\n';
    FOS.flush();
  }
}

void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
                                          const TimelineViewEntry &Entry,
                                          unsigned Iteration,
                                          unsigned SourceIndex) const {
  if (Iteration == 0 && SourceIndex == 0)
    OS << '\n';
  OS << '[' << Iteration << ',' << SourceIndex << ']';
  OS.PadToColumn(10);
  assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
  unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
  for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
    OS << ((I % 5 == 0) ? '.' : ' ');
  OS << TimelineView::DisplayChar::Dispatched;
  if (CycleDispatched != Entry.CycleExecuted) {
    // Zero latency instructions have the same value for CycleDispatched,
    // CycleIssued and CycleExecuted.
    for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
      OS << TimelineView::DisplayChar::Waiting;
    if (Entry.CycleIssued == Entry.CycleExecuted)
      OS << TimelineView::DisplayChar::DisplayChar::Executed;
    else {
      if (CycleDispatched != Entry.CycleIssued)
        OS << TimelineView::DisplayChar::Executing;
      for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
           ++I)
        OS << TimelineView::DisplayChar::Executing;
      OS << TimelineView::DisplayChar::Executed;
    }
  }

  for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
    OS << TimelineView::DisplayChar::RetireLag;
  if (Entry.CycleExecuted < Entry.CycleRetired)
    OS << TimelineView::DisplayChar::Retired;

  // Skip other columns.
  for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
    OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
}

static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
  OS << "\n\nTimeline view:\n";
  if (Cycles >= 10) {
    OS.PadToColumn(10);
    for (unsigned I = 0; I <= Cycles; ++I) {
      if (((I / 10) & 1) == 0)
        OS << ' ';
      else
        OS << I % 10;
    }
    OS << '\n';
  }

  OS << "Index";
  OS.PadToColumn(10);
  for (unsigned I = 0; I <= Cycles; ++I) {
    if (((I / 10) & 1) == 0)
      OS << I % 10;
    else
      OS << ' ';
  }
  OS << '\n';
}

void TimelineView::printTimeline(raw_ostream &OS) const {
  formatted_raw_ostream FOS(OS);
  printTimelineHeader(FOS, LastCycle);
  FOS.flush();

  unsigned IID = 0;
  ArrayRef<llvm::MCInst> Source = getSource();
  const unsigned Iterations = Timeline.size() / Source.size();
  for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
    for (const MCInst &Inst : Source) {
      const TimelineViewEntry &Entry = Timeline[IID];
      // When an instruction is retired after timeline-max-cycles,
      // its CycleRetired is left at 0. However, it's possible for
      // a 0 latency instruction to be retired during cycle 0 and we
      // don't want to early exit in that case. The CycleExecuted
      // attribute is set correctly whether or not it is greater
      // than timeline-max-cycles so we can use that to ensure
      // we don't early exit because of a 0 latency instruction.
      if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0)
        return;

      unsigned SourceIndex = IID % Source.size();
      printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
      FOS << "   " << printInstructionString(Inst) << '\n';
      FOS.flush();

      ++IID;
    }
  }
}

json::Value TimelineView::toJSON() const {
  json::Array TimelineInfo;

  for (const TimelineViewEntry &TLE : Timeline) {
    TimelineInfo.push_back(
        json::Object({{"CycleDispatched", TLE.CycleDispatched},
                      {"CycleReady", TLE.CycleReady},
                      {"CycleIssued", TLE.CycleIssued},
                      {"CycleExecuted", TLE.CycleExecuted},
                      {"CycleRetired", TLE.CycleRetired}}));
  }
  return json::Object({{"TimelineInfo", std::move(TimelineInfo)}});
}
} // namespace mca
} // namespace llvm