1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 12:33:33 +02:00
llvm-mirror/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Sameer AbuAsal d114bbf2bb [RISCV] Add peepholes for Global Address lowering patterns
Summary:
  Base and offset are always separated when a GlobalAddress node is lowered
  (rL332641) as an optimization to reduce instruction count. However, this
  optimization is not profitable if the Global Address ends up being used in only
  instruction.

  This patch adds peephole optimizations that merge an offset of
  an address calculation into the LUI %%hi and ADD %lo of the lowering sequence.

  The peephole handles three patterns:

 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset
     --->
      ADDI (LUI %hi(global + offset)) %lo(global + offset).

   This generates:
   lui a0, hi (global + offset)
   add a0, a0, lo (global + offset)

   Instead of

   lui a0, hi (global)
   addi a0, hi (global)
   addi a0, offset

   This pattern is for cases when the offset is small enough to fit in the
   immediate filed of ADDI (less than 12 bits).

 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset))
     --->
      offset = hi_offset << 12
      ADDI (LUI %hi(global + offset)) %lo(global + offset)

   Which generates the ASM:

   lui  a0, hi(global + offset)
   addi a0, lo(global + offset)

   Instead of:

   lui  a0, hi(global)
   addi a0, lo(global)
   lui a1, (offset)
   add a0, a0, a1

   This pattern is for cases when the offset doesn't fit in an immediate field
   of ADDI but the lower 12 bits are all zeros.

 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI hi_offset)))
     --->
        offset = global + offhi20<<12 + offlo12
        ADDI (LUI %hi(global + offset)) %lo(global + offset)

   Which generates the ASM:

   lui  a1, %hi(global + offset)
   addi a1, %lo(global + offset)

   Instead of:

   lui  a0, hi(global)
   addi a0, lo(global)
   lui a1, (offhi20)
   addi a1, (offlo12)
   add a0, a0, a1

   This pattern is for cases when the offset doesn't fit in an immediate field
   of ADDI and both the lower 1 bits and high 20 bits are non zero.

    Reviewers: asb

    Reviewed By: asb

    Subscribers: rbar, johnrusso, simoncook, jordy.potman.lists, apazos,
  niosHD, kito-cheng, shiva0217, zzheng, edward-jones, mgrang

llvm-svn: 333455
2018-05-29 19:34:54 +00:00

469 lines
17 KiB
C++

//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the RISCV target.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
// RISCV-specific code to select RISCV machine instructions for
// SelectionDAG operations.
namespace {
class RISCVDAGToDAGISel final : public SelectionDAGISel {
const RISCVSubtarget *Subtarget;
public:
explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
: SelectionDAGISel(TargetMachine) {}
StringRef getPassName() const override {
return "RISCV DAG->DAG Pattern Instruction Selection";
}
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<RISCVSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
void PostprocessISelDAG() override;
void Select(SDNode *Node) override;
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
bool SelectAddrFI(SDValue Addr, SDValue &Base);
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
private:
void doPeepholeLoadStoreADDI();
void doPeepholeGlobalAddiLuiOffset();
void doPeepholeBuildPairF64SplitF64();
};
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
doPeepholeLoadStoreADDI();
doPeepholeGlobalAddiLuiOffset();
doPeepholeBuildPairF64SplitF64();
}
void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
MVT XLenVT = Subtarget->getXLenVT();
// If we have a custom node, we have already selected
if (Node->isMachineOpcode()) {
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
Node->setNodeId(-1);
return;
}
// Instruction Selection not handled by the auto-generated tablegen selection
// should be handled here.
EVT VT = Node->getValueType(0);
if (Opcode == ISD::Constant && VT == XLenVT) {
auto *ConstNode = cast<ConstantSDNode>(Node);
// Materialize zero constants as copies from X0. This allows the coalescer
// to propagate these into other instructions.
if (ConstNode->isNullValue()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
}
if (Opcode == ISD::FrameIndex) {
SDLoc DL(Node);
SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
EVT VT = Node->getValueType(0);
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
return;
}
// Select the default instruction.
SelectCode(Node);
}
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
// We just support simple memory operands that have a single address
// operand and need no special handling.
OutOps.push_back(Op);
return false;
default:
break;
}
return true;
}
bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
if (auto FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
return true;
}
return false;
}
// Detect the pattern lui %hi(global) --> ADDI %lo(global)
// HiLUI LoADDI
static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI,
SDValue &HiLUI, GlobalAddressSDNode *&GAlo,
GlobalAddressSDNode *&GAhi) {
// Try to detect the pattern on every operand of the tail instruction.
for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) {
LoADDI = Tail->getOperand(Idx);
// LoADDI should only be used by one instruction (Tail).
if (!LoADDI->isMachineOpcode() ||
!(LoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<GlobalAddressSDNode>(LoADDI->getOperand(1)) ||
!LoADDI->hasOneUse())
continue;
// Check for existence of %lo target flag.
GAlo = cast<GlobalAddressSDNode>(LoADDI->getOperand(1));
if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) ||
!(GAlo->getOffset() == 0))
return false;
// Check for existence of %hi target flag.
HiLUI = LoADDI->getOperand(0);
if (!HiLUI->isMachineOpcode() ||
!(HiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<GlobalAddressSDNode>(HiLUI->getOperand(0)) || !HiLUI->hasOneUse())
return false;
GAhi = cast<GlobalAddressSDNode>(HiLUI->getOperand(0));
if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) ||
!(GAhi->getOffset() == 0))
return false;
return true;
}
return false;
}
static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) {
if (!OffsetLUI->isMachineOpcode() ||
!(OffsetLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetLUI->getOperand(0)))
return false;
Offset = cast<ConstantSDNode>(OffsetLUI->getOperand(0))->getSExtValue();
Offset = Offset << 12;
LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n");
return true;
}
static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) {
// LoADDI should only be used by the tail instruction only.
if (!OffsetLoADDI->isMachineOpcode() ||
!(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<ConstantSDNode>(OffsetLoADDI->getOperand(1)) ||
!OffsetLoADDI->hasOneUse())
return false;
int64_t OffLo =
cast<ConstantSDNode>(OffsetLoADDI->getOperand(1))->getZExtValue();
// HiLUI should only be used by the loADDI.
SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0));
if (!OffsetHiLUI->isMachineOpcode() ||
!(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetHiLUI->getOperand(0)) ||
!OffsetHiLUI->hasOneUse())
return false;
int64_t OffHi =
cast<ConstantSDNode>(OffsetHiLUI->getOperand(0))->getSExtValue();
Offset = (OffHi << 12) + OffLo;
LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n");
return true;
}
static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG,
GlobalAddressSDNode *GAhi,
GlobalAddressSDNode *GAlo,
SDValue &GlobalHiLUI, SDValue &GlobalLoADDI,
int64_t Offset) {
// Update the offset in GAhi and GAlo.
SDLoc DL(Tail->getOperand(1));
SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL,
GlobalHiLUI.getValueType(),
Offset, RISCVII::MO_HI);
SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL,
GlobalLoADDI.getValueType(),
Offset, RISCVII::MO_LO);
CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew);
CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew);
// Update all uses of the Tail with the GlobalLoADDI. After
// this Tail will be a dead node.
SDValue From = SDValue(Tail, 0);
CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1);
}
// TODO: This transformation might be better implemeted in a Machine Funtion
// Pass as discussed here: https://reviews.llvm.org/D45748.
//
// Merge the offset of address calculation into the offset field
// of a global address node in a global address lowering sequence ("LUI
// %hi(global) --> add %lo(global)") under the following conditions: 1) The
// offset field in the global address lowering sequence is zero. 2) The lowered
// global address is only used in one node, referred to as "Tail".
// This peephole does the following transformations to merge the offset:
// 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset).
//
// This generates:
// lui a0, hi (global + offset)
// add a0, a0, lo (global + offset)
// Instead of
// lui a0, hi (global)
// addi a0, hi (global)
// addi a0, offset
// This pattern is for cases when the offset is small enough to fit in the
// immediate filed of ADDI (less than 12 bits).
// 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset))
// --->
// offset = hi_offset << 12
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a0, hi(global + offset)
// addi a0, lo(global + offset)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offset)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI but the lower 12 bits are all zeros.
// 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI
// hi_offset)))
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a1, %hi(global + offhi20<<12 + offlo12)
// addi a1, %lo(global + offhi20<<12 + offlo12)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offhi20)
// addi a1, (offlo12)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI and both the lower 1 bits and high 20 bits are non zero.
void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin());
while (Position != Begin) {
SDNode *Tail = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (Tail->use_empty() || !Tail->isMachineOpcode())
continue;
// The tail instruction can be an ADD or an ADDI.
if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD ||
Tail->getMachineOpcode() == RISCV::ADDI))
continue;
// First detect the global address part of pattern:
// (lui %hi(global) --> Addi %lo(global))
unsigned GlobalLoADDiIdx;
SDValue GlobalLoADDI;
SDValue GlobalHiLUI;
GlobalAddressSDNode *GAhi;
GlobalAddressSDNode *GAlo;
if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI,
GAlo, GAhi))
continue;
LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n");
// Detect the offset part for the address calculation by looking at the
// other operand of the tail instruction:
int64_t Offset;
if (Tail->getMachineOpcode() == RISCV::ADD) {
// If the Tail is an ADD instruction, the offset can be in two forms:
// 1) LUI hi_Offset followed by:
// ADDI lo_offset
// This happens in case the offset has non zero bits in
// both hi 20 and lo 12 bits.
// 2) LUI (offset20)
// This happens in case the lower 12 bits of the offset are zeros.
SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx);
if (!matchAddiLuiOffset(OffsetVal, Offset) &&
!matchLuiOffset(OffsetVal, Offset))
continue;
} else
// The Tail is an ADDI instruction:
Offset = cast<ConstantSDNode>(Tail->getOperand(1 - GlobalLoADDiIdx))
->getSExtValue();
LLVM_DEBUG(
dbgs()
<< " Fold offset value into global offset of LUI %hi and ADDI %lo\n");
LLVM_DEBUG(dbgs() << "\tTail:");
LLVM_DEBUG(Tail->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:");
LLVM_DEBUG(GlobalHiLUI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:");
LLVM_DEBUG(GlobalLoADDI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI,
Offset);
}
CurDAG->RemoveDeadNodes();
}
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (add base, off), 0) -> (load base, off)
// (store val, (add base, off)) -> (store val, base, off)
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
int OffsetOpIdx;
int BaseOpIdx;
// Only attempt this optimisation for I-type loads and S-type stores.
switch (N->getMachineOpcode()) {
default:
continue;
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLW:
case RISCV::FLD:
BaseOpIdx = 0;
OffsetOpIdx = 1;
break;
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
case RISCV::FSW:
case RISCV::FSD:
BaseOpIdx = 1;
OffsetOpIdx = 2;
break;
}
// Currently, the load/store offset must be 0 to be considered for this
// peephole optimisation.
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
N->getConstantOperandVal(OffsetOpIdx) != 0)
continue;
SDValue Base = N->getOperand(BaseOpIdx);
// If the base is an ADDI, we can merge it in to the load/store.
if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
continue;
SDValue ImmOperand = Base.getOperand(1);
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
ImmOperand = CurDAG->getTargetConstant(
Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
ImmOperand = CurDAG->getTargetGlobalAddress(
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
GA->getOffset(), GA->getTargetFlags());
} else {
continue;
}
LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
LLVM_DEBUG(Base->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\nN: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
// Modify the offset operand of the load/store.
if (BaseOpIdx == 0) // Load
CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
N->getOperand(2));
else // Store
CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
ImmOperand, N->getOperand(3));
// The add-immediate may now be dead, in which case remove it.
if (Base.getNode()->use_empty())
CurDAG->RemoveDeadNode(Base.getNode());
}
}
// Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is
// built of two i32 values, only to be split apart again. This must be done
// here as a peephole optimisation as the DAG has not been fully legalized at
// the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so
// some nodes would not yet have been replaced with libcalls.
void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any nodes other than SplitF64Pseudo.
if (N->use_empty() || !N->isMachineOpcode() ||
!(N->getMachineOpcode() == RISCV::SplitF64Pseudo))
continue;
// If the operand to SplitF64 is a BuildPairF64, the split operation is
// redundant. Just use the operands to BuildPairF64 as the result.
SDValue F64Val = N->getOperand(0);
if (F64Val.isMachineOpcode() &&
F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) {
LLVM_DEBUG(
dbgs() << "Removing redundant SplitF64Pseudo and replacing uses "
"with BuildPairF64Pseudo operands:\n");
LLVM_DEBUG(dbgs() << "N: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "F64Val: ");
LLVM_DEBUG(F64Val->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)};
CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
}
}
CurDAG->RemoveDeadNodes();
}
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
return new RISCVDAGToDAGISel(TM);
}