mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
[RISCV] Fold ADDIs into load/stores with nonzero offsets
We can often fold an ADDI into the offset of load/store instructions: (load (addi base, off1), off2) -> (load base, off1+off2) (store val, (addi base, off1), off2) -> (store val, base, off1+off2) This is possible when the off1+off2 continues to fit the 12-bit immediate. We remove the previous restriction where we would never fold the ADDIs if the load/stores had nonzero offsets. We now do the fold the the resulting constant still fits a 12-bit immediate, or if off1 is a variable's address and we know based on that variable's alignment that off1+offs2 won't overflow. Differential Revision: https://reviews.llvm.org/D79690
This commit is contained in:
parent
3b7308f12c
commit
0a0548042f
@ -14,6 +14,7 @@
|
||||
#include "MCTargetDesc/RISCVMCTargetDesc.h"
|
||||
#include "Utils/RISCVMatInt.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/Support/Alignment.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@ -160,8 +161,9 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
|
||||
}
|
||||
|
||||
// Merge an ADDI into the offset of a load/store instruction where possible.
|
||||
// (load (add base, off), 0) -> (load base, off)
|
||||
// (store val, (add base, off)) -> (store val, base, off)
|
||||
// (load (addi base, off1), off2) -> (load base, off1+off2)
|
||||
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
|
||||
// This is possible when off1+off2 fits a 12-bit immediate.
|
||||
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
|
||||
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
||||
++Position;
|
||||
@ -202,10 +204,7 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
|
||||
break;
|
||||
}
|
||||
|
||||
// Currently, the load/store offset must be 0 to be considered for this
|
||||
// peephole optimisation.
|
||||
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
|
||||
N->getConstantOperandVal(OffsetOpIdx) != 0)
|
||||
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
|
||||
continue;
|
||||
|
||||
SDValue Base = N->getOperand(BaseOpIdx);
|
||||
@ -215,18 +214,39 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
|
||||
continue;
|
||||
|
||||
SDValue ImmOperand = Base.getOperand(1);
|
||||
uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
|
||||
|
||||
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
|
||||
ImmOperand = CurDAG->getTargetConstant(
|
||||
Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
|
||||
int64_t Offset1 = Const->getSExtValue();
|
||||
int64_t CombinedOffset = Offset1 + Offset2;
|
||||
if (!isInt<12>(CombinedOffset))
|
||||
continue;
|
||||
ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
|
||||
ImmOperand.getValueType());
|
||||
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
|
||||
// If the off1 in (addi base, off1) is a global variable's address (its
|
||||
// low part, really), then we can rely on the alignment of that variable
|
||||
// to provide a margin of safety before off1 can overflow the 12 bits.
|
||||
// Check if off2 falls within that margin; if so off1+off2 can't overflow.
|
||||
const DataLayout &DL = CurDAG->getDataLayout();
|
||||
Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
|
||||
if (Offset2 != 0 && Alignment <= Offset2)
|
||||
continue;
|
||||
int64_t Offset1 = GA->getOffset();
|
||||
int64_t CombinedOffset = Offset1 + Offset2;
|
||||
ImmOperand = CurDAG->getTargetGlobalAddress(
|
||||
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
|
||||
GA->getOffset(), GA->getTargetFlags());
|
||||
CombinedOffset, GA->getTargetFlags());
|
||||
} else if (auto CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
|
||||
// Ditto.
|
||||
Align Alignment = CP->getAlign();
|
||||
if (Offset2 != 0 && Alignment <= Offset2)
|
||||
continue;
|
||||
int64_t Offset1 = CP->getOffset();
|
||||
int64_t CombinedOffset = Offset1 + Offset2;
|
||||
ImmOperand = CurDAG->getTargetConstantPool(
|
||||
CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
|
||||
CP->getOffset(), CP->getTargetFlags());
|
||||
CombinedOffset, CP->getTargetFlags());
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -23,8 +23,7 @@ define i64 @load_g_0() nounwind {
|
||||
; RV32I: # %bb.0: # %entry
|
||||
; RV32I-NEXT: lui a1, %hi(g_0)
|
||||
; RV32I-NEXT: lw a0, %lo(g_0)(a1)
|
||||
; RV32I-NEXT: addi a1, a1, %lo(g_0)
|
||||
; RV32I-NEXT: lw a1, 4(a1)
|
||||
; RV32I-NEXT: lw a1, %lo(g_0+4)(a1)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: load_g_0:
|
||||
@ -99,8 +98,7 @@ define i64 @load_g_8() nounwind {
|
||||
; RV32I: # %bb.0: # %entry
|
||||
; RV32I-NEXT: lui a1, %hi(g_8)
|
||||
; RV32I-NEXT: lw a0, %lo(g_8)(a1)
|
||||
; RV32I-NEXT: addi a1, a1, %lo(g_8)
|
||||
; RV32I-NEXT: lw a1, 4(a1)
|
||||
; RV32I-NEXT: lw a1, %lo(g_8+4)(a1)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: load_g_8:
|
||||
@ -118,8 +116,7 @@ define i64 @load_g_16() nounwind {
|
||||
; RV32I: # %bb.0: # %entry
|
||||
; RV32I-NEXT: lui a1, %hi(g_16)
|
||||
; RV32I-NEXT: lw a0, %lo(g_16)(a1)
|
||||
; RV32I-NEXT: addi a1, a1, %lo(g_16)
|
||||
; RV32I-NEXT: lw a1, 4(a1)
|
||||
; RV32I-NEXT: lw a1, %lo(g_16+4)(a1)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: load_g_16:
|
||||
@ -155,9 +152,8 @@ define void @store_g_8() nounwind {
|
||||
; RV32I-LABEL: store_g_8:
|
||||
; RV32I: # %bb.0: # %entry
|
||||
; RV32I-NEXT: lui a0, %hi(g_8)
|
||||
; RV32I-NEXT: sw zero, %lo(g_8+4)(a0)
|
||||
; RV32I-NEXT: sw zero, %lo(g_8)(a0)
|
||||
; RV32I-NEXT: addi a0, a0, %lo(g_8)
|
||||
; RV32I-NEXT: sw zero, 4(a0)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: store_g_8:
|
||||
@ -197,15 +193,14 @@ entry:
|
||||
define i64 @load_ga_16() nounwind {
|
||||
; RV32I-LABEL: load_ga_16:
|
||||
; RV32I: # %bb.0: # %entry
|
||||
; RV32I-NEXT: lui a0, %hi(ga_16)
|
||||
; RV32I-NEXT: addi a1, a0, %lo(ga_16)
|
||||
; RV32I-NEXT: lw a0, 8(a1)
|
||||
; RV32I-NEXT: lw a1, 12(a1)
|
||||
; RV32I-NEXT: lui a1, %hi(ga_16)
|
||||
; RV32I-NEXT: lw a0, %lo(ga_16+8)(a1)
|
||||
; RV32I-NEXT: lw a1, %lo(ga_16+12)(a1)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: load_ga_16:
|
||||
; RV64I: # %bb.0: # %entry
|
||||
; RV64I-NEXT: lui a0, %hi(ga_16+8)
|
||||
; RV64I-NEXT: lui a0, %hi(ga_16)
|
||||
; RV64I-NEXT: ld a0, %lo(ga_16+8)(a0)
|
||||
; RV64I-NEXT: ret
|
||||
entry:
|
||||
@ -245,8 +240,7 @@ define i64 @load_tl_8() nounwind {
|
||||
; RV32I-NEXT: lui a0, %tprel_hi(tl_8)
|
||||
; RV32I-NEXT: add a1, a0, tp, %tprel_add(tl_8)
|
||||
; RV32I-NEXT: lw a0, %tprel_lo(tl_8)(a1)
|
||||
; RV32I-NEXT: addi a1, a1, %tprel_lo(tl_8)
|
||||
; RV32I-NEXT: lw a1, 4(a1)
|
||||
; RV32I-NEXT: lw a1, %tprel_lo(tl_8+4)(a1)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: load_tl_8:
|
||||
|
@ -14,27 +14,25 @@ define i32 @test_load_and_cmp() nounwind {
|
||||
; RV32I-NEXT: addi sp, sp, -48
|
||||
; RV32I-NEXT: sw ra, 44(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x)
|
||||
; RV32I-NEXT: addi a1, a0, %lo(x)
|
||||
; RV32I-NEXT: lw a6, 4(a1)
|
||||
; RV32I-NEXT: lw a7, 8(a1)
|
||||
; RV32I-NEXT: lw a1, 12(a1)
|
||||
; RV32I-NEXT: lw a0, %lo(x)(a0)
|
||||
; RV32I-NEXT: lw a6, %lo(x)(a0)
|
||||
; RV32I-NEXT: lw a7, %lo(x+4)(a0)
|
||||
; RV32I-NEXT: lw a3, %lo(x+8)(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+12)(a0)
|
||||
; RV32I-NEXT: lui a4, %hi(y)
|
||||
; RV32I-NEXT: addi a5, a4, %lo(y)
|
||||
; RV32I-NEXT: lw a2, 4(a5)
|
||||
; RV32I-NEXT: lw a3, 8(a5)
|
||||
; RV32I-NEXT: lw a5, 12(a5)
|
||||
; RV32I-NEXT: lw a4, %lo(y)(a4)
|
||||
; RV32I-NEXT: sw a4, 8(sp)
|
||||
; RV32I-NEXT: sw a0, 24(sp)
|
||||
; RV32I-NEXT: sw a5, 20(sp)
|
||||
; RV32I-NEXT: sw a3, 16(sp)
|
||||
; RV32I-NEXT: lw a5, %lo(y)(a4)
|
||||
; RV32I-NEXT: lw a2, %lo(y+4)(a4)
|
||||
; RV32I-NEXT: lw a1, %lo(y+8)(a4)
|
||||
; RV32I-NEXT: lw a4, %lo(y+12)(a4)
|
||||
; RV32I-NEXT: sw a4, 20(sp)
|
||||
; RV32I-NEXT: sw a1, 16(sp)
|
||||
; RV32I-NEXT: sw a2, 12(sp)
|
||||
; RV32I-NEXT: sw a1, 36(sp)
|
||||
; RV32I-NEXT: sw a7, 32(sp)
|
||||
; RV32I-NEXT: sw a5, 8(sp)
|
||||
; RV32I-NEXT: sw a0, 36(sp)
|
||||
; RV32I-NEXT: sw a3, 32(sp)
|
||||
; RV32I-NEXT: sw a7, 28(sp)
|
||||
; RV32I-NEXT: addi a0, sp, 24
|
||||
; RV32I-NEXT: addi a1, sp, 8
|
||||
; RV32I-NEXT: sw a6, 28(sp)
|
||||
; RV32I-NEXT: sw a6, 24(sp)
|
||||
; RV32I-NEXT: call __netf2
|
||||
; RV32I-NEXT: snez a0, a0
|
||||
; RV32I-NEXT: lw ra, 44(sp)
|
||||
@ -53,28 +51,26 @@ define i32 @test_add_and_fptosi() nounwind {
|
||||
; RV32I-NEXT: addi sp, sp, -80
|
||||
; RV32I-NEXT: sw ra, 76(sp)
|
||||
; RV32I-NEXT: lui a0, %hi(x)
|
||||
; RV32I-NEXT: addi a1, a0, %lo(x)
|
||||
; RV32I-NEXT: lw a6, 4(a1)
|
||||
; RV32I-NEXT: lw a7, 8(a1)
|
||||
; RV32I-NEXT: lw a1, 12(a1)
|
||||
; RV32I-NEXT: lw a0, %lo(x)(a0)
|
||||
; RV32I-NEXT: lw a6, %lo(x)(a0)
|
||||
; RV32I-NEXT: lw a7, %lo(x+4)(a0)
|
||||
; RV32I-NEXT: lw a2, %lo(x+8)(a0)
|
||||
; RV32I-NEXT: lw a0, %lo(x+12)(a0)
|
||||
; RV32I-NEXT: lui a4, %hi(y)
|
||||
; RV32I-NEXT: addi a5, a4, %lo(y)
|
||||
; RV32I-NEXT: lw a3, 4(a5)
|
||||
; RV32I-NEXT: lw a2, 8(a5)
|
||||
; RV32I-NEXT: lw a5, 12(a5)
|
||||
; RV32I-NEXT: lw a4, %lo(y)(a4)
|
||||
; RV32I-NEXT: sw a4, 24(sp)
|
||||
; RV32I-NEXT: sw a0, 40(sp)
|
||||
; RV32I-NEXT: sw a5, 36(sp)
|
||||
; RV32I-NEXT: sw a2, 32(sp)
|
||||
; RV32I-NEXT: lw a5, %lo(y)(a4)
|
||||
; RV32I-NEXT: lw a3, %lo(y+4)(a4)
|
||||
; RV32I-NEXT: lw a1, %lo(y+8)(a4)
|
||||
; RV32I-NEXT: lw a4, %lo(y+12)(a4)
|
||||
; RV32I-NEXT: sw a4, 36(sp)
|
||||
; RV32I-NEXT: sw a1, 32(sp)
|
||||
; RV32I-NEXT: sw a3, 28(sp)
|
||||
; RV32I-NEXT: sw a1, 52(sp)
|
||||
; RV32I-NEXT: sw a7, 48(sp)
|
||||
; RV32I-NEXT: sw a5, 24(sp)
|
||||
; RV32I-NEXT: sw a0, 52(sp)
|
||||
; RV32I-NEXT: sw a2, 48(sp)
|
||||
; RV32I-NEXT: sw a7, 44(sp)
|
||||
; RV32I-NEXT: addi a0, sp, 56
|
||||
; RV32I-NEXT: addi a1, sp, 40
|
||||
; RV32I-NEXT: addi a2, sp, 24
|
||||
; RV32I-NEXT: sw a6, 44(sp)
|
||||
; RV32I-NEXT: sw a6, 40(sp)
|
||||
; RV32I-NEXT: call __addtf3
|
||||
; RV32I-NEXT: lw a1, 56(sp)
|
||||
; RV32I-NEXT: lw a0, 60(sp)
|
||||
|
@ -383,16 +383,13 @@ define void @foo_double() nounwind #0 {
|
||||
; CHECK-RV32-NEXT: sw t6, 0(sp)
|
||||
; CHECK-RV32-NEXT: lui a1, %hi(h)
|
||||
; CHECK-RV32-NEXT: lw a0, %lo(h)(a1)
|
||||
; CHECK-RV32-NEXT: addi a1, a1, %lo(h)
|
||||
; CHECK-RV32-NEXT: lw a1, 4(a1)
|
||||
; CHECK-RV32-NEXT: lw a1, %lo(h+4)(a1)
|
||||
; CHECK-RV32-NEXT: lui a3, %hi(i)
|
||||
; CHECK-RV32-NEXT: lw a2, %lo(i)(a3)
|
||||
; CHECK-RV32-NEXT: addi a3, a3, %lo(i)
|
||||
; CHECK-RV32-NEXT: lw a3, 4(a3)
|
||||
; CHECK-RV32-NEXT: lw a3, %lo(i+4)(a3)
|
||||
; CHECK-RV32-NEXT: call __adddf3
|
||||
; CHECK-RV32-NEXT: lui a2, %hi(g)
|
||||
; CHECK-RV32-NEXT: addi a3, a2, %lo(g)
|
||||
; CHECK-RV32-NEXT: sw a1, 4(a3)
|
||||
; CHECK-RV32-NEXT: sw a1, %lo(g+4)(a2)
|
||||
; CHECK-RV32-NEXT: sw a0, %lo(g)(a2)
|
||||
; CHECK-RV32-NEXT: lw t6, 0(sp)
|
||||
; CHECK-RV32-NEXT: lw t5, 4(sp)
|
||||
@ -466,16 +463,13 @@ define void @foo_double() nounwind #0 {
|
||||
; CHECK-RV32IF-NEXT: fsw fs11, 0(sp)
|
||||
; CHECK-RV32IF-NEXT: lui a1, %hi(h)
|
||||
; CHECK-RV32IF-NEXT: lw a0, %lo(h)(a1)
|
||||
; CHECK-RV32IF-NEXT: addi a1, a1, %lo(h)
|
||||
; CHECK-RV32IF-NEXT: lw a1, 4(a1)
|
||||
; CHECK-RV32IF-NEXT: lw a1, %lo(h+4)(a1)
|
||||
; CHECK-RV32IF-NEXT: lui a3, %hi(i)
|
||||
; CHECK-RV32IF-NEXT: lw a2, %lo(i)(a3)
|
||||
; CHECK-RV32IF-NEXT: addi a3, a3, %lo(i)
|
||||
; CHECK-RV32IF-NEXT: lw a3, 4(a3)
|
||||
; CHECK-RV32IF-NEXT: lw a3, %lo(i+4)(a3)
|
||||
; CHECK-RV32IF-NEXT: call __adddf3
|
||||
; CHECK-RV32IF-NEXT: lui a2, %hi(g)
|
||||
; CHECK-RV32IF-NEXT: addi a3, a2, %lo(g)
|
||||
; CHECK-RV32IF-NEXT: sw a1, 4(a3)
|
||||
; CHECK-RV32IF-NEXT: sw a1, %lo(g+4)(a2)
|
||||
; CHECK-RV32IF-NEXT: sw a0, %lo(g)(a2)
|
||||
; CHECK-RV32IF-NEXT: flw fs11, 0(sp)
|
||||
; CHECK-RV32IF-NEXT: flw fs10, 4(sp)
|
||||
@ -580,16 +574,13 @@ define void @foo_fp_double() nounwind #1 {
|
||||
; CHECK-RV32-NEXT: addi s0, sp, 80
|
||||
; CHECK-RV32-NEXT: lui a1, %hi(h)
|
||||
; CHECK-RV32-NEXT: lw a0, %lo(h)(a1)
|
||||
; CHECK-RV32-NEXT: addi a1, a1, %lo(h)
|
||||
; CHECK-RV32-NEXT: lw a1, 4(a1)
|
||||
; CHECK-RV32-NEXT: lw a1, %lo(h+4)(a1)
|
||||
; CHECK-RV32-NEXT: lui a3, %hi(i)
|
||||
; CHECK-RV32-NEXT: lw a2, %lo(i)(a3)
|
||||
; CHECK-RV32-NEXT: addi a3, a3, %lo(i)
|
||||
; CHECK-RV32-NEXT: lw a3, 4(a3)
|
||||
; CHECK-RV32-NEXT: lw a3, %lo(i+4)(a3)
|
||||
; CHECK-RV32-NEXT: call __adddf3
|
||||
; CHECK-RV32-NEXT: lui a2, %hi(g)
|
||||
; CHECK-RV32-NEXT: addi a3, a2, %lo(g)
|
||||
; CHECK-RV32-NEXT: sw a1, 4(a3)
|
||||
; CHECK-RV32-NEXT: sw a1, %lo(g+4)(a2)
|
||||
; CHECK-RV32-NEXT: sw a0, %lo(g)(a2)
|
||||
; CHECK-RV32-NEXT: lw t6, 12(sp)
|
||||
; CHECK-RV32-NEXT: lw t5, 16(sp)
|
||||
@ -666,16 +657,13 @@ define void @foo_fp_double() nounwind #1 {
|
||||
; CHECK-RV32IF-NEXT: addi s0, sp, 208
|
||||
; CHECK-RV32IF-NEXT: lui a1, %hi(h)
|
||||
; CHECK-RV32IF-NEXT: lw a0, %lo(h)(a1)
|
||||
; CHECK-RV32IF-NEXT: addi a1, a1, %lo(h)
|
||||
; CHECK-RV32IF-NEXT: lw a1, 4(a1)
|
||||
; CHECK-RV32IF-NEXT: lw a1, %lo(h+4)(a1)
|
||||
; CHECK-RV32IF-NEXT: lui a3, %hi(i)
|
||||
; CHECK-RV32IF-NEXT: lw a2, %lo(i)(a3)
|
||||
; CHECK-RV32IF-NEXT: addi a3, a3, %lo(i)
|
||||
; CHECK-RV32IF-NEXT: lw a3, 4(a3)
|
||||
; CHECK-RV32IF-NEXT: lw a3, %lo(i+4)(a3)
|
||||
; CHECK-RV32IF-NEXT: call __adddf3
|
||||
; CHECK-RV32IF-NEXT: lui a2, %hi(g)
|
||||
; CHECK-RV32IF-NEXT: addi a3, a2, %lo(g)
|
||||
; CHECK-RV32IF-NEXT: sw a1, 4(a3)
|
||||
; CHECK-RV32IF-NEXT: sw a1, %lo(g+4)(a2)
|
||||
; CHECK-RV32IF-NEXT: sw a0, %lo(g)(a2)
|
||||
; CHECK-RV32IF-NEXT: flw fs11, 12(sp)
|
||||
; CHECK-RV32IF-NEXT: flw fs10, 16(sp)
|
||||
|
@ -22,8 +22,7 @@ define i64 @load_i64_global() nounwind {
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: lui a1, %hi(val64)
|
||||
; RV32I-NEXT: lw a0, %lo(val64)(a1)
|
||||
; RV32I-NEXT: addi a1, a1, %lo(val64)
|
||||
; RV32I-NEXT: lw a1, 4(a1)
|
||||
; RV32I-NEXT: lw a1, %lo(val64+4)(a1)
|
||||
; RV32I-NEXT: ret
|
||||
%1 = load i64, i64* @val64
|
||||
ret i64 %1
|
||||
|
Loading…
Reference in New Issue
Block a user