mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
RegUsageInfoCollector: Don't iterate all regs for every reg class
This is extremly slow on AMDGPU, which has a lot of physical register and a lot of register classes. determineCalleeSaves, via MachineRegisterInfo::isPhysRegUsed already added all of the super registers to the saved set. llvm-svn: 365370
This commit is contained in:
parent
e77f66c673
commit
7e7822cc8c
@ -189,42 +189,17 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
|
||||
// Target will return the set of registers that it saves/restores as needed.
|
||||
SavedRegs.clear();
|
||||
TFI.determineCalleeSaves(MF, SavedRegs);
|
||||
if (SavedRegs.none())
|
||||
return;
|
||||
|
||||
// Insert subregs.
|
||||
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
|
||||
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||
unsigned Reg = CSRegs[i];
|
||||
if (SavedRegs.test(Reg))
|
||||
for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR)
|
||||
MCPhysReg Reg = CSRegs[i];
|
||||
if (SavedRegs.test(Reg)) {
|
||||
// Save subregisters
|
||||
for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
|
||||
SavedRegs.set(*SR);
|
||||
}
|
||||
|
||||
// Insert any register fully saved via subregisters.
|
||||
// FIXME: Rewrite to use regunits.
|
||||
for (const TargetRegisterClass *RC : TRI.regclasses()) {
|
||||
if (!RC->CoveredBySubRegs)
|
||||
continue;
|
||||
|
||||
for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
|
||||
if (SavedRegs.test(PReg))
|
||||
continue;
|
||||
|
||||
// Check if PReg is fully covered by its subregs.
|
||||
if (!RC->contains(PReg))
|
||||
continue;
|
||||
|
||||
// Add PReg to SavedRegs if all subregs are saved.
|
||||
bool AllSubRegsSaved = true;
|
||||
bool HasAtLeastOneSubreg = false;
|
||||
for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) {
|
||||
HasAtLeastOneSubreg = true;
|
||||
if (!SavedRegs.test(*SR)) {
|
||||
AllSubRegsSaved = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (AllSubRegsSaved && HasAtLeastOneSubreg)
|
||||
SavedRegs.set(PReg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
46
test/CodeGen/AMDGPU/ipra-regmask.ll
Normal file
46
test/CodeGen/AMDGPU/ipra-regmask.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s
|
||||
; Make sure the expected regmask is generated for sub/superregisters.
|
||||
|
||||
; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
|
||||
define void @csr() #0 {
|
||||
call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
||||
define void @subregs_for_super() #0 {
|
||||
call void asm sideeffect "", "~{v0},~{v1}"() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
||||
define void @clobbered_reg_with_sub() #0 {
|
||||
call void asm sideeffect "", "~{v[0:1]}"() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: nothing Clobbered Registers: {{$}}
|
||||
define void @nothing() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}}
|
||||
define void @special_regs() #0 {
|
||||
call void asm sideeffect "", "~{m0},~{scc}"() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}}
|
||||
define void @vcc() #0 {
|
||||
call void asm sideeffect "", "~{vcc}"() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@llvm.used = appending global [6 x i8*] [i8* bitcast (void ()* @csr to i8*),
|
||||
i8* bitcast (void ()* @subregs_for_super to i8*),
|
||||
i8* bitcast (void ()* @clobbered_reg_with_sub to i8*),
|
||||
i8* bitcast (void ()* @nothing to i8*),
|
||||
i8* bitcast (void ()* @special_regs to i8*),
|
||||
i8* bitcast (void ()* @vcc to i8*)]
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue
Block a user