mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[AMDGPU] Do not allow register coalescer to create big superregs
Limit register coalescer by not allowing it to artificially increase size of registers beyond dword. Such super-registers are in fact register sequences and not distinct HW registers. With more super-regs we would need to allocate adjacent registers and constraint regalloc more than needed. Moreover, our super registers are overlapping. For instance we have VGPR0_VGPR1_VGPR2, VGPR1_VGPR2_VGPR3, VGPR2_VGPR3_VGPR4 etc, which complicates registers allocation even more, resulting in excessive spilling. Differential Revision: https://reviews.llvm.org/D28782 llvm-svn: 292413
This commit is contained in:
parent
19a6ba7a53
commit
3a97f30b01
@ -1474,3 +1474,23 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
|
|||||||
unsigned Reg) const {
|
unsigned Reg) const {
|
||||||
return hasVGPRs(getRegClassForReg(MRI, Reg));
|
return hasVGPRs(getRegClassForReg(MRI, Reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
|
||||||
|
const TargetRegisterClass *SrcRC,
|
||||||
|
unsigned SubReg,
|
||||||
|
const TargetRegisterClass *DstRC,
|
||||||
|
unsigned DstSubReg,
|
||||||
|
const TargetRegisterClass *NewRC) const {
|
||||||
|
unsigned SrcSize = SrcRC->getSize();
|
||||||
|
unsigned DstSize = DstRC->getSize();
|
||||||
|
unsigned NewSize = NewRC->getSize();
|
||||||
|
|
||||||
|
// Do not increase size of registers beyond dword, we would need to allocate
|
||||||
|
// adjacent registers and constraint regalloc more than needed.
|
||||||
|
|
||||||
|
// Always allow dword coalescing.
|
||||||
|
if (SrcSize <= 4 || DstSize <= 4)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return NewSize <= DstSize || NewSize <= SrcSize;
|
||||||
|
}
|
||||||
|
@ -264,6 +264,13 @@ public:
|
|||||||
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
|
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
|
||||||
unsigned EltSize) const;
|
unsigned EltSize) const;
|
||||||
|
|
||||||
|
bool shouldCoalesce(MachineInstr *MI,
|
||||||
|
const TargetRegisterClass *SrcRC,
|
||||||
|
unsigned SubReg,
|
||||||
|
const TargetRegisterClass *DstRC,
|
||||||
|
unsigned DstSubReg,
|
||||||
|
const TargetRegisterClass *NewRC) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||||
unsigned LoadStoreOp,
|
unsigned LoadStoreOp,
|
||||||
|
@ -399,15 +399,15 @@ define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x
|
|||||||
; XVI-NOT: v_cvt_f32_f16
|
; XVI-NOT: v_cvt_f32_f16
|
||||||
|
|
||||||
; GCN: buffer_load_dwordx2 v{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]
|
; GCN: buffer_load_dwordx2 v{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]
|
||||||
; VI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
|
; VI-DAG: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
|
||||||
; GCN: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
|
; GCN-DAG: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
|
||||||
; GCN: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
|
; GCN-DAG: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
|
||||||
; SI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
|
; SI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
|
||||||
; GCN: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
|
; GCN-DAG: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
|
||||||
|
|
||||||
; GCN: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
|
; GCN-DAG: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
|
||||||
; GCN: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
|
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
|
||||||
; GCN: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
|
; GCN-DAG: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
|
||||||
; GCN-NOT: v_cvt_f64_f32_e32
|
; GCN-NOT: v_cvt_f64_f32_e32
|
||||||
|
|
||||||
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[XLO]]:[[YHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[XLO]]:[[YHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||||
|
71
test/CodeGen/AMDGPU/limit-coalesce.mir
Normal file
71
test/CodeGen/AMDGPU/limit-coalesce.mir
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
# Check that coalescer does not create wider register tuple than in source
|
||||||
|
|
||||||
|
# CHECK: - { id: 2, class: vreg_64 }
|
||||||
|
# CHECK: - { id: 3, class: vreg_64 }
|
||||||
|
# CHECK: - { id: 4, class: vreg_64 }
|
||||||
|
# CHECK: - { id: 5, class: vreg_96 }
|
||||||
|
# CHECK: - { id: 6, class: vreg_96 }
|
||||||
|
# CHECK: - { id: 7, class: vreg_128 }
|
||||||
|
# CHECK: - { id: 8, class: vreg_128 }
|
||||||
|
# No more registers shall be defined
|
||||||
|
# CHECK-NEXT: liveins:
|
||||||
|
# CHECK: FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %4,
|
||||||
|
# CHECK: FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %6,
|
||||||
|
|
||||||
|
---
|
||||||
|
name: main
|
||||||
|
alignment: 0
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 1, class: sreg_32_xm0, preferred-register: '%1' }
|
||||||
|
- { id: 2, class: vreg_64, preferred-register: '%2' }
|
||||||
|
- { id: 3, class: vreg_64 }
|
||||||
|
- { id: 4, class: vreg_64 }
|
||||||
|
- { id: 5, class: vreg_64 }
|
||||||
|
- { id: 6, class: vreg_96 }
|
||||||
|
- { id: 7, class: vreg_96 }
|
||||||
|
- { id: 8, class: vreg_128 }
|
||||||
|
- { id: 9, class: vreg_128 }
|
||||||
|
liveins:
|
||||||
|
- { reg: '%sgpr6', virtual-reg: '%1' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
maxCallFrameSize: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: %sgpr0, %vgpr0_vgpr1
|
||||||
|
|
||||||
|
%3 = IMPLICIT_DEF
|
||||||
|
undef %4.sub0 = COPY %sgpr0
|
||||||
|
%4.sub1 = COPY %3.sub0
|
||||||
|
undef %5.sub0 = COPY %4.sub1
|
||||||
|
%5.sub1 = COPY %4.sub0
|
||||||
|
FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
|
|
||||||
|
%6 = IMPLICIT_DEF
|
||||||
|
undef %7.sub0_sub1 = COPY %6
|
||||||
|
%7.sub2 = COPY %3.sub0
|
||||||
|
FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
|
|
||||||
|
%8 = IMPLICIT_DEF
|
||||||
|
undef %9.sub0_sub1_sub2 = COPY %8
|
||||||
|
%9.sub3 = COPY %3.sub0
|
||||||
|
FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
|
...
|
Loading…
Reference in New Issue
Block a user