mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[RISCV] Peephole optimisation for load/store of global values or constant addresses
(load (add base, off), 0) -> (load base, off) (store val, (add base, off)) -> (store val, base, off) This is similar to an equivalent peephole optimisation in PPCISelDAGToDAG. llvm-svn: 327831
This commit is contained in:
parent
9b92317c0b
commit
67316f8242
@ -42,6 +42,8 @@ public:
|
||||
return SelectionDAGISel::runOnMachineFunction(MF);
|
||||
}
|
||||
|
||||
void PostprocessISelDAG() override;
|
||||
|
||||
void Select(SDNode *Node) override;
|
||||
|
||||
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
|
||||
@ -51,9 +53,14 @@ public:
|
||||
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "RISCVGenDAGISel.inc"
|
||||
|
||||
private:
|
||||
void doPeepholeLoadStoreADDI();
|
||||
};
|
||||
}
|
||||
|
||||
void RISCVDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); }
|
||||
|
||||
void RISCVDAGToDAGISel::Select(SDNode *Node) {
|
||||
unsigned Opcode = Node->getOpcode();
|
||||
MVT XLenVT = Subtarget->getXLenVT();
|
||||
@ -117,6 +124,94 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Merge an ADDI into the offset of a load/store instruction where possible.
|
||||
// (load (add base, off), 0) -> (load base, off)
|
||||
// (store val, (add base, off)) -> (store val, base, off)
|
||||
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
|
||||
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
||||
++Position;
|
||||
|
||||
while (Position != CurDAG->allnodes_begin()) {
|
||||
SDNode *N = &*--Position;
|
||||
// Skip dead nodes and any non-machine opcodes.
|
||||
if (N->use_empty() || !N->isMachineOpcode())
|
||||
continue;
|
||||
|
||||
int OffsetOpIdx;
|
||||
int BaseOpIdx;
|
||||
|
||||
// Only attempt this optimisation for I-type loads and S-type stores.
|
||||
switch (N->getMachineOpcode()) {
|
||||
default:
|
||||
continue;
|
||||
case RISCV::LB:
|
||||
case RISCV::LH:
|
||||
case RISCV::LW:
|
||||
case RISCV::LBU:
|
||||
case RISCV::LHU:
|
||||
case RISCV::LWU:
|
||||
case RISCV::LD:
|
||||
case RISCV::FLW:
|
||||
case RISCV::FLD:
|
||||
BaseOpIdx = 0;
|
||||
OffsetOpIdx = 1;
|
||||
break;
|
||||
case RISCV::SB:
|
||||
case RISCV::SH:
|
||||
case RISCV::SW:
|
||||
case RISCV::SD:
|
||||
case RISCV::FSW:
|
||||
case RISCV::FSD:
|
||||
BaseOpIdx = 1;
|
||||
OffsetOpIdx = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
// Currently, the load/store offset must be 0 to be considered for this
|
||||
// peephole optimisation.
|
||||
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
|
||||
N->getConstantOperandVal(OffsetOpIdx) != 0)
|
||||
continue;
|
||||
|
||||
SDValue Base = N->getOperand(BaseOpIdx);
|
||||
|
||||
// If the base is an ADDI, we can merge it in to the load/store.
|
||||
if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
|
||||
continue;
|
||||
|
||||
SDValue ImmOperand = Base.getOperand(1);
|
||||
|
||||
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
|
||||
ImmOperand = CurDAG->getTargetConstant(
|
||||
Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
|
||||
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
|
||||
ImmOperand = CurDAG->getTargetGlobalAddress(
|
||||
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
|
||||
GA->getOffset(), GA->getTargetFlags());
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
|
||||
DEBUG(Base->dump(CurDAG));
|
||||
DEBUG(dbgs() << "\nN: ");
|
||||
DEBUG(N->dump(CurDAG));
|
||||
DEBUG(dbgs() << "\n");
|
||||
|
||||
// Modify the offset operand of the load/store.
|
||||
if (BaseOpIdx == 0) // Load
|
||||
CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
|
||||
N->getOperand(2));
|
||||
else // Store
|
||||
CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
|
||||
ImmOperand, N->getOperand(3));
|
||||
|
||||
// The add-immediate may now be dead, in which case remove it.
|
||||
if (Base.getNode()->use_empty())
|
||||
CurDAG->RemoveDeadNode(Base.getNode());
|
||||
}
|
||||
}
|
||||
|
||||
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
|
||||
// for instruction scheduling.
|
||||
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
|
||||
|
@ -9,12 +9,11 @@ define void @test_blockaddress() nounwind {
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -16
|
||||
; RV32I-NEXT: sw ra, 12(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(addr)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(addr)
|
||||
; RV32I-NEXT: lui a1, %hi(.Ltmp0)
|
||||
; RV32I-NEXT: addi a1, a1, %lo(.Ltmp0)
|
||||
; RV32I-NEXT: sw a1, 0(a0)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lui a0, %hi(.Ltmp0)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(.Ltmp0)
|
||||
; RV32I-NEXT: lui a1, %hi(addr)
|
||||
; RV32I-NEXT: sw a0, %lo(addr)(a1)
|
||||
; RV32I-NEXT: lw a0, %lo(addr)(a1)
|
||||
; RV32I-NEXT: jr a0
|
||||
; RV32I-NEXT: .Ltmp0: # Block address taken
|
||||
; RV32I-NEXT: .LBB0_1: # %block
|
||||
|
@ -23,20 +23,16 @@ define void @caller() nounwind {
|
||||
; RV32I-NEXT: addi sp, sp, -32
|
||||
; RV32I-NEXT: sw ra, 28(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(foo+12)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(foo+12)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(foo+12)(a0)
|
||||
; RV32I-NEXT: sw a0, 24(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(foo+8)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(foo+8)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(foo+8)(a0)
|
||||
; RV32I-NEXT: sw a0, 20(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(foo+4)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(foo+4)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(foo+4)(a0)
|
||||
; RV32I-NEXT: sw a0, 16(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(foo)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(foo)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(foo)(a0)
|
||||
; RV32I-NEXT: sw a0, 12(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(callee)
|
||||
; RV32I-NEXT: addi a1, a0, %lo(callee)
|
||||
|
@ -14,36 +14,28 @@ define i32 @test_load_and_cmp() nounwind {
|
||||
; RV32I-NEXT: addi sp, sp, -48
|
||||
; RV32I-NEXT: sw ra, 44(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y+12)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y+12)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y+12)(a0)
|
||||
; RV32I-NEXT: sw a0, 20(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y+8)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y+8)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y+8)(a0)
|
||||
; RV32I-NEXT: sw a0, 16(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y+4)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y+4)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y+4)(a0)
|
||||
; RV32I-NEXT: sw a0, 12(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y)(a0)
|
||||
; RV32I-NEXT: sw a0, 8(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x+12)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x+12)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+12)(a0)
|
||||
; RV32I-NEXT: sw a0, 36(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x+8)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x+8)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+8)(a0)
|
||||
; RV32I-NEXT: sw a0, 32(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x+4)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x+4)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+4)(a0)
|
||||
; RV32I-NEXT: sw a0, 28(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x)(a0)
|
||||
; RV32I-NEXT: sw a0, 24(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(__netf2)
|
||||
; RV32I-NEXT: addi a2, a0, %lo(__netf2)
|
||||
@ -68,36 +60,28 @@ define i32 @test_add_and_fptosi() nounwind {
|
||||
; RV32I-NEXT: addi sp, sp, -80
|
||||
; RV32I-NEXT: sw ra, 76(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y+12)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y+12)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y+12)(a0)
|
||||
; RV32I-NEXT: sw a0, 36(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y+8)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y+8)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y+8)(a0)
|
||||
; RV32I-NEXT: sw a0, 32(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y+4)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y+4)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y+4)(a0)
|
||||
; RV32I-NEXT: sw a0, 28(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(y)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(y)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(y)(a0)
|
||||
; RV32I-NEXT: sw a0, 24(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x+12)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x+12)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+12)(a0)
|
||||
; RV32I-NEXT: sw a0, 52(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x+8)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x+8)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+8)(a0)
|
||||
; RV32I-NEXT: sw a0, 48(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x+4)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x+4)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+4)(a0)
|
||||
; RV32I-NEXT: sw a0, 44(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(x)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x)(a0)
|
||||
; RV32I-NEXT: sw a0, 40(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(__addtf3)
|
||||
; RV32I-NEXT: addi a3, a0, %lo(__addtf3)
|
||||
|
@ -8,8 +8,7 @@ define i32 @constraint_r(i32 %a) {
|
||||
; RV32I-LABEL: constraint_r:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: lui a1, %hi(gi)
|
||||
; RV32I-NEXT: addi a1, a1, %lo(gi)
|
||||
; RV32I-NEXT: lw a1, 0(a1)
|
||||
; RV32I-NEXT: lw a1, %lo(gi)(a1)
|
||||
; RV32I-NEXT: #APP
|
||||
; RV32I-NEXT: add a0, a0, a1
|
||||
; RV32I-NEXT: #NO_APP
|
||||
|
@ -163,17 +163,14 @@ define i16 @load_sext_zext_anyext_i1_i16(i1 *%a) nounwind {
|
||||
@G = global i32 0
|
||||
|
||||
define i32 @lw_sw_global(i32 %a) nounwind {
|
||||
; TODO: the addi should be folded in to the lw/sw operations
|
||||
; RV32I-LABEL: lw_sw_global:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: lui a1, %hi(G)
|
||||
; RV32I-NEXT: addi a2, a1, %lo(G)
|
||||
; RV32I-NEXT: lw a1, 0(a2)
|
||||
; RV32I-NEXT: sw a0, 0(a2)
|
||||
; RV32I-NEXT: lui a2, %hi(G)
|
||||
; RV32I-NEXT: lw a1, %lo(G)(a2)
|
||||
; RV32I-NEXT: sw a0, %lo(G)(a2)
|
||||
; RV32I-NEXT: lui a2, %hi(G+36)
|
||||
; RV32I-NEXT: addi a2, a2, %lo(G+36)
|
||||
; RV32I-NEXT: lw a3, 0(a2)
|
||||
; RV32I-NEXT: sw a0, 0(a2)
|
||||
; RV32I-NEXT: lw a3, %lo(G+36)(a2)
|
||||
; RV32I-NEXT: sw a0, %lo(G+36)(a2)
|
||||
; RV32I-NEXT: mv a0, a1
|
||||
; RV32I-NEXT: ret
|
||||
%1 = load volatile i32, i32* @G
|
||||
@ -186,13 +183,11 @@ define i32 @lw_sw_global(i32 %a) nounwind {
|
||||
|
||||
; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1
|
||||
define i32 @lw_sw_constant(i32 %a) nounwind {
|
||||
; TODO: the addi should be folded in to the lw/sw
|
||||
; RV32I-LABEL: lw_sw_constant:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: lui a1, 912092
|
||||
; RV32I-NEXT: addi a2, a1, -273
|
||||
; RV32I-NEXT: lw a1, 0(a2)
|
||||
; RV32I-NEXT: sw a0, 0(a2)
|
||||
; RV32I-NEXT: lui a2, 912092
|
||||
; RV32I-NEXT: lw a1, -273(a2)
|
||||
; RV32I-NEXT: sw a0, -273(a2)
|
||||
; RV32I-NEXT: mv a0, a1
|
||||
; RV32I-NEXT: ret
|
||||
%1 = inttoptr i32 3735928559 to i32*
|
||||
|
@ -17,17 +17,13 @@ define i64 @load_i64(i64 *%a) nounwind {
|
||||
|
||||
@val64 = local_unnamed_addr global i64 2863311530, align 8
|
||||
|
||||
; TODO: codegen on this should be improved. It shouldn't be necessary to
|
||||
; generate two addi
|
||||
define i64 @load_i64_global() nounwind {
|
||||
; RV32I-LABEL: load_i64_global:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: lui a0, %hi(val64)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(val64)
|
||||
; RV32I-NEXT: lw a0, 0(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(val64)(a0)
|
||||
; RV32I-NEXT: lui a1, %hi(val64+4)
|
||||
; RV32I-NEXT: addi a1, a1, %lo(val64+4)
|
||||
; RV32I-NEXT: lw a1, 0(a1)
|
||||
; RV32I-NEXT: lw a1, %lo(val64+4)(a1)
|
||||
; RV32I-NEXT: ret
|
||||
%1 = load i64, i64* @val64
|
||||
ret i64 %1
|
||||
|
Loading…
Reference in New Issue
Block a user