1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[RISCV] Fold ADDIs into load/stores with nonzero offsets

We can often fold an ADDI into the offset of load/store instructions:

   (load (addi base, off1), off2) -> (load base, off1+off2)
   (store val, (addi base, off1), off2) -> (store val, base, off1+off2)

This is possible when the off1+off2 continues to fit the 12-bit immediate.
We remove the previous restriction where we would never fold the ADDIs if
the load/stores had nonzero offsets. We now do the fold the the resulting
constant still fits a 12-bit immediate, or if off1 is a variable's address
and we know based on that variable's alignment that off1+offs2 won't overflow.

Differential Revision: https://reviews.llvm.org/D79690
This commit is contained in:
Luís Marques 2020-06-24 13:53:27 +01:00
parent 3b7308f12c
commit 0a0548042f
8 changed files with 3325 additions and 524 deletions

View File

@ -14,6 +14,7 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "Utils/RISCVMatInt.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@ -160,8 +161,9 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
}
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (add base, off), 0) -> (load base, off)
// (store val, (add base, off)) -> (store val, base, off)
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
// This is possible when off1+off2 fits a 12-bit immediate.
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
@ -202,10 +204,7 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
break;
}
// Currently, the load/store offset must be 0 to be considered for this
// peephole optimisation.
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
N->getConstantOperandVal(OffsetOpIdx) != 0)
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
continue;
SDValue Base = N->getOperand(BaseOpIdx);
@ -215,18 +214,39 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
continue;
SDValue ImmOperand = Base.getOperand(1);
uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
ImmOperand = CurDAG->getTargetConstant(
Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
int64_t Offset1 = Const->getSExtValue();
int64_t CombinedOffset = Offset1 + Offset2;
if (!isInt<12>(CombinedOffset))
continue;
ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
ImmOperand.getValueType());
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
// If the off1 in (addi base, off1) is a global variable's address (its
// low part, really), then we can rely on the alignment of that variable
// to provide a margin of safety before off1 can overflow the 12 bits.
// Check if off2 falls within that margin; if so off1+off2 can't overflow.
const DataLayout &DL = CurDAG->getDataLayout();
Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
if (Offset2 != 0 && Alignment <= Offset2)
continue;
int64_t Offset1 = GA->getOffset();
int64_t CombinedOffset = Offset1 + Offset2;
ImmOperand = CurDAG->getTargetGlobalAddress(
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
GA->getOffset(), GA->getTargetFlags());
CombinedOffset, GA->getTargetFlags());
} else if (auto CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
// Ditto.
Align Alignment = CP->getAlign();
if (Offset2 != 0 && Alignment <= Offset2)
continue;
int64_t Offset1 = CP->getOffset();
int64_t CombinedOffset = Offset1 + Offset2;
ImmOperand = CurDAG->getTargetConstantPool(
CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
CP->getOffset(), CP->getTargetFlags());
CombinedOffset, CP->getTargetFlags());
} else {
continue;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -23,8 +23,7 @@ define i64 @load_g_0() nounwind {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a1, %hi(g_0)
; RV32I-NEXT: lw a0, %lo(g_0)(a1)
; RV32I-NEXT: addi a1, a1, %lo(g_0)
; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: lw a1, %lo(g_0+4)(a1)
; RV32I-NEXT: ret
;
; RV64I-LABEL: load_g_0:
@ -99,8 +98,7 @@ define i64 @load_g_8() nounwind {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a1, %hi(g_8)
; RV32I-NEXT: lw a0, %lo(g_8)(a1)
; RV32I-NEXT: addi a1, a1, %lo(g_8)
; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: lw a1, %lo(g_8+4)(a1)
; RV32I-NEXT: ret
;
; RV64I-LABEL: load_g_8:
@ -118,8 +116,7 @@ define i64 @load_g_16() nounwind {
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a1, %hi(g_16)
; RV32I-NEXT: lw a0, %lo(g_16)(a1)
; RV32I-NEXT: addi a1, a1, %lo(g_16)
; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: lw a1, %lo(g_16+4)(a1)
; RV32I-NEXT: ret
;
; RV64I-LABEL: load_g_16:
@ -155,9 +152,8 @@ define void @store_g_8() nounwind {
; RV32I-LABEL: store_g_8:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a0, %hi(g_8)
; RV32I-NEXT: sw zero, %lo(g_8+4)(a0)
; RV32I-NEXT: sw zero, %lo(g_8)(a0)
; RV32I-NEXT: addi a0, a0, %lo(g_8)
; RV32I-NEXT: sw zero, 4(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: store_g_8:
@ -197,15 +193,14 @@ entry:
define i64 @load_ga_16() nounwind {
; RV32I-LABEL: load_ga_16:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a0, %hi(ga_16)
; RV32I-NEXT: addi a1, a0, %lo(ga_16)
; RV32I-NEXT: lw a0, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: lui a1, %hi(ga_16)
; RV32I-NEXT: lw a0, %lo(ga_16+8)(a1)
; RV32I-NEXT: lw a1, %lo(ga_16+12)(a1)
; RV32I-NEXT: ret
;
; RV64I-LABEL: load_ga_16:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a0, %hi(ga_16+8)
; RV64I-NEXT: lui a0, %hi(ga_16)
; RV64I-NEXT: ld a0, %lo(ga_16+8)(a0)
; RV64I-NEXT: ret
entry:
@ -245,8 +240,7 @@ define i64 @load_tl_8() nounwind {
; RV32I-NEXT: lui a0, %tprel_hi(tl_8)
; RV32I-NEXT: add a1, a0, tp, %tprel_add(tl_8)
; RV32I-NEXT: lw a0, %tprel_lo(tl_8)(a1)
; RV32I-NEXT: addi a1, a1, %tprel_lo(tl_8)
; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: lw a1, %tprel_lo(tl_8+4)(a1)
; RV32I-NEXT: ret
;
; RV64I-LABEL: load_tl_8:

View File

@ -14,27 +14,25 @@ define i32 @test_load_and_cmp() nounwind {
; RV32I-NEXT: addi sp, sp, -48
; RV32I-NEXT: sw ra, 44(sp)
; RV32I-NEXT: lui a0, %hi(x)
; RV32I-NEXT: addi a1, a0, %lo(x)
; RV32I-NEXT: lw a6, 4(a1)
; RV32I-NEXT: lw a7, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: lw a0, %lo(x)(a0)
; RV32I-NEXT: lw a6, %lo(x)(a0)
; RV32I-NEXT: lw a7, %lo(x+4)(a0)
; RV32I-NEXT: lw a3, %lo(x+8)(a0)
; RV32I-NEXT: lw a0, %lo(x+12)(a0)
; RV32I-NEXT: lui a4, %hi(y)
; RV32I-NEXT: addi a5, a4, %lo(y)
; RV32I-NEXT: lw a2, 4(a5)
; RV32I-NEXT: lw a3, 8(a5)
; RV32I-NEXT: lw a5, 12(a5)
; RV32I-NEXT: lw a4, %lo(y)(a4)
; RV32I-NEXT: sw a4, 8(sp)
; RV32I-NEXT: sw a0, 24(sp)
; RV32I-NEXT: sw a5, 20(sp)
; RV32I-NEXT: sw a3, 16(sp)
; RV32I-NEXT: lw a5, %lo(y)(a4)
; RV32I-NEXT: lw a2, %lo(y+4)(a4)
; RV32I-NEXT: lw a1, %lo(y+8)(a4)
; RV32I-NEXT: lw a4, %lo(y+12)(a4)
; RV32I-NEXT: sw a4, 20(sp)
; RV32I-NEXT: sw a1, 16(sp)
; RV32I-NEXT: sw a2, 12(sp)
; RV32I-NEXT: sw a1, 36(sp)
; RV32I-NEXT: sw a7, 32(sp)
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a0, 36(sp)
; RV32I-NEXT: sw a3, 32(sp)
; RV32I-NEXT: sw a7, 28(sp)
; RV32I-NEXT: addi a0, sp, 24
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: sw a6, 28(sp)
; RV32I-NEXT: sw a6, 24(sp)
; RV32I-NEXT: call __netf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: lw ra, 44(sp)
@ -53,28 +51,26 @@ define i32 @test_add_and_fptosi() nounwind {
; RV32I-NEXT: addi sp, sp, -80
; RV32I-NEXT: sw ra, 76(sp)
; RV32I-NEXT: lui a0, %hi(x)
; RV32I-NEXT: addi a1, a0, %lo(x)
; RV32I-NEXT: lw a6, 4(a1)
; RV32I-NEXT: lw a7, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: lw a0, %lo(x)(a0)
; RV32I-NEXT: lw a6, %lo(x)(a0)
; RV32I-NEXT: lw a7, %lo(x+4)(a0)
; RV32I-NEXT: lw a2, %lo(x+8)(a0)
; RV32I-NEXT: lw a0, %lo(x+12)(a0)
; RV32I-NEXT: lui a4, %hi(y)
; RV32I-NEXT: addi a5, a4, %lo(y)
; RV32I-NEXT: lw a3, 4(a5)
; RV32I-NEXT: lw a2, 8(a5)
; RV32I-NEXT: lw a5, 12(a5)
; RV32I-NEXT: lw a4, %lo(y)(a4)
; RV32I-NEXT: sw a4, 24(sp)
; RV32I-NEXT: sw a0, 40(sp)
; RV32I-NEXT: sw a5, 36(sp)
; RV32I-NEXT: sw a2, 32(sp)
; RV32I-NEXT: lw a5, %lo(y)(a4)
; RV32I-NEXT: lw a3, %lo(y+4)(a4)
; RV32I-NEXT: lw a1, %lo(y+8)(a4)
; RV32I-NEXT: lw a4, %lo(y+12)(a4)
; RV32I-NEXT: sw a4, 36(sp)
; RV32I-NEXT: sw a1, 32(sp)
; RV32I-NEXT: sw a3, 28(sp)
; RV32I-NEXT: sw a1, 52(sp)
; RV32I-NEXT: sw a7, 48(sp)
; RV32I-NEXT: sw a5, 24(sp)
; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: sw a2, 48(sp)
; RV32I-NEXT: sw a7, 44(sp)
; RV32I-NEXT: addi a0, sp, 56
; RV32I-NEXT: addi a1, sp, 40
; RV32I-NEXT: addi a2, sp, 24
; RV32I-NEXT: sw a6, 44(sp)
; RV32I-NEXT: sw a6, 40(sp)
; RV32I-NEXT: call __addtf3
; RV32I-NEXT: lw a1, 56(sp)
; RV32I-NEXT: lw a0, 60(sp)

View File

@ -383,16 +383,13 @@ define void @foo_double() nounwind #0 {
; CHECK-RV32-NEXT: sw t6, 0(sp)
; CHECK-RV32-NEXT: lui a1, %hi(h)
; CHECK-RV32-NEXT: lw a0, %lo(h)(a1)
; CHECK-RV32-NEXT: addi a1, a1, %lo(h)
; CHECK-RV32-NEXT: lw a1, 4(a1)
; CHECK-RV32-NEXT: lw a1, %lo(h+4)(a1)
; CHECK-RV32-NEXT: lui a3, %hi(i)
; CHECK-RV32-NEXT: lw a2, %lo(i)(a3)
; CHECK-RV32-NEXT: addi a3, a3, %lo(i)
; CHECK-RV32-NEXT: lw a3, 4(a3)
; CHECK-RV32-NEXT: lw a3, %lo(i+4)(a3)
; CHECK-RV32-NEXT: call __adddf3
; CHECK-RV32-NEXT: lui a2, %hi(g)
; CHECK-RV32-NEXT: addi a3, a2, %lo(g)
; CHECK-RV32-NEXT: sw a1, 4(a3)
; CHECK-RV32-NEXT: sw a1, %lo(g+4)(a2)
; CHECK-RV32-NEXT: sw a0, %lo(g)(a2)
; CHECK-RV32-NEXT: lw t6, 0(sp)
; CHECK-RV32-NEXT: lw t5, 4(sp)
@ -466,16 +463,13 @@ define void @foo_double() nounwind #0 {
; CHECK-RV32IF-NEXT: fsw fs11, 0(sp)
; CHECK-RV32IF-NEXT: lui a1, %hi(h)
; CHECK-RV32IF-NEXT: lw a0, %lo(h)(a1)
; CHECK-RV32IF-NEXT: addi a1, a1, %lo(h)
; CHECK-RV32IF-NEXT: lw a1, 4(a1)
; CHECK-RV32IF-NEXT: lw a1, %lo(h+4)(a1)
; CHECK-RV32IF-NEXT: lui a3, %hi(i)
; CHECK-RV32IF-NEXT: lw a2, %lo(i)(a3)
; CHECK-RV32IF-NEXT: addi a3, a3, %lo(i)
; CHECK-RV32IF-NEXT: lw a3, 4(a3)
; CHECK-RV32IF-NEXT: lw a3, %lo(i+4)(a3)
; CHECK-RV32IF-NEXT: call __adddf3
; CHECK-RV32IF-NEXT: lui a2, %hi(g)
; CHECK-RV32IF-NEXT: addi a3, a2, %lo(g)
; CHECK-RV32IF-NEXT: sw a1, 4(a3)
; CHECK-RV32IF-NEXT: sw a1, %lo(g+4)(a2)
; CHECK-RV32IF-NEXT: sw a0, %lo(g)(a2)
; CHECK-RV32IF-NEXT: flw fs11, 0(sp)
; CHECK-RV32IF-NEXT: flw fs10, 4(sp)
@ -580,16 +574,13 @@ define void @foo_fp_double() nounwind #1 {
; CHECK-RV32-NEXT: addi s0, sp, 80
; CHECK-RV32-NEXT: lui a1, %hi(h)
; CHECK-RV32-NEXT: lw a0, %lo(h)(a1)
; CHECK-RV32-NEXT: addi a1, a1, %lo(h)
; CHECK-RV32-NEXT: lw a1, 4(a1)
; CHECK-RV32-NEXT: lw a1, %lo(h+4)(a1)
; CHECK-RV32-NEXT: lui a3, %hi(i)
; CHECK-RV32-NEXT: lw a2, %lo(i)(a3)
; CHECK-RV32-NEXT: addi a3, a3, %lo(i)
; CHECK-RV32-NEXT: lw a3, 4(a3)
; CHECK-RV32-NEXT: lw a3, %lo(i+4)(a3)
; CHECK-RV32-NEXT: call __adddf3
; CHECK-RV32-NEXT: lui a2, %hi(g)
; CHECK-RV32-NEXT: addi a3, a2, %lo(g)
; CHECK-RV32-NEXT: sw a1, 4(a3)
; CHECK-RV32-NEXT: sw a1, %lo(g+4)(a2)
; CHECK-RV32-NEXT: sw a0, %lo(g)(a2)
; CHECK-RV32-NEXT: lw t6, 12(sp)
; CHECK-RV32-NEXT: lw t5, 16(sp)
@ -666,16 +657,13 @@ define void @foo_fp_double() nounwind #1 {
; CHECK-RV32IF-NEXT: addi s0, sp, 208
; CHECK-RV32IF-NEXT: lui a1, %hi(h)
; CHECK-RV32IF-NEXT: lw a0, %lo(h)(a1)
; CHECK-RV32IF-NEXT: addi a1, a1, %lo(h)
; CHECK-RV32IF-NEXT: lw a1, 4(a1)
; CHECK-RV32IF-NEXT: lw a1, %lo(h+4)(a1)
; CHECK-RV32IF-NEXT: lui a3, %hi(i)
; CHECK-RV32IF-NEXT: lw a2, %lo(i)(a3)
; CHECK-RV32IF-NEXT: addi a3, a3, %lo(i)
; CHECK-RV32IF-NEXT: lw a3, 4(a3)
; CHECK-RV32IF-NEXT: lw a3, %lo(i+4)(a3)
; CHECK-RV32IF-NEXT: call __adddf3
; CHECK-RV32IF-NEXT: lui a2, %hi(g)
; CHECK-RV32IF-NEXT: addi a3, a2, %lo(g)
; CHECK-RV32IF-NEXT: sw a1, 4(a3)
; CHECK-RV32IF-NEXT: sw a1, %lo(g+4)(a2)
; CHECK-RV32IF-NEXT: sw a0, %lo(g)(a2)
; CHECK-RV32IF-NEXT: flw fs11, 12(sp)
; CHECK-RV32IF-NEXT: flw fs10, 16(sp)

View File

@ -22,8 +22,7 @@ define i64 @load_i64_global() nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: lui a1, %hi(val64)
; RV32I-NEXT: lw a0, %lo(val64)(a1)
; RV32I-NEXT: addi a1, a1, %lo(val64)
; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: lw a1, %lo(val64+4)(a1)
; RV32I-NEXT: ret
%1 = load i64, i64* @val64
ret i64 %1