1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

AMDGPU: Materialize frame index before add

It isn't generally safe to fold the frame index
directly into the operand since it will possibly
not be an inline immediate after it is expanded.

This surprisingly seems to produce better code, since
the FI doesn't prevent folding other immediate operands.

llvm-svn: 288185
This commit is contained in:
Matt Arsenault 2016-11-29 19:20:48 +00:00
parent a89faeec9c
commit 3c5076a42d
2 changed files with 10 additions and 4 deletions

View File

@ -245,12 +245,17 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(Offset);
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
.addFrameIndex(FrameIdx);
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead)
.addReg(OffsetReg, RegState::Kill)
.addFrameIndex(FrameIdx);
.addReg(FIReg);
}
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,

View File

@ -7,15 +7,16 @@
;
; CHECK-LABEL: {{^}}main:
; CHECK: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
; FIXME: add 0?
; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x140
; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0
; CHECK-DAG: v_add_i32_e32 [[ADD_K0:v[0-9]+]], vcc, 0x140, [[BASE_FI]]
; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BASE_FI]], [[BYTES]]
; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen