mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
- Clean up some crappy code which deals with coalescing of copies which look at
extract_subreg / insert_subreg, etc. - Add support for more aggressive insert_subreg coalescing. llvm-svn: 101971
This commit is contained in:
parent
4d1724c3e8
commit
a0c4b2952f
@ -111,6 +111,12 @@ namespace llvm {
|
||||
double getScaledIntervalSize(LiveInterval& I) {
|
||||
return (1000.0 * I.getSize()) / indexes_->getIndexesLength();
|
||||
}
|
||||
|
||||
/// getFuncInstructionCount - Return the number of instructions in the
|
||||
/// current function.
|
||||
unsigned getFuncInstructionCount() {
|
||||
return indexes_->getFunctionSize();
|
||||
}
|
||||
|
||||
/// getApproximateInstructionCount - computes an estimate of the number
|
||||
/// of instructions in a given LiveInterval.
|
||||
|
@ -1168,20 +1168,44 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
|
||||
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
|
||||
/// two virtual registers from different register classes.
|
||||
bool
|
||||
SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
|
||||
unsigned SmallReg,
|
||||
unsigned Threshold) {
|
||||
// Then make sure the intervals are *short*.
|
||||
LiveInterval &LargeInt = li_->getInterval(LargeReg);
|
||||
LiveInterval &SmallInt = li_->getInterval(SmallReg);
|
||||
unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
|
||||
unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
|
||||
if (LargeSize > Threshold) {
|
||||
unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
|
||||
mri_->use_nodbg_end());
|
||||
unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
|
||||
mri_->use_nodbg_end());
|
||||
if (SmallUses*LargeSize < LargeUses*SmallSize)
|
||||
SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
|
||||
unsigned DstReg,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
const TargetRegisterClass *DstRC,
|
||||
const TargetRegisterClass *NewRC) {
|
||||
unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
|
||||
// This heuristics is good enough in practice, but it's obviously not *right*.
|
||||
// 4 is a magic number that works well enough for x86, ARM, etc. It filter
|
||||
// out all but the most restrictive register classes.
|
||||
if (NewRCCount > 4 ||
|
||||
// Early exit if the function is fairly small, coalesce aggressively if
|
||||
// that's the case. For really special register classes with 3 or
|
||||
// fewer registers, be a bit more careful.
|
||||
(li_->getFuncInstructionCount() / NewRCCount) < 8)
|
||||
return true;
|
||||
LiveInterval &SrcInt = li_->getInterval(SrcReg);
|
||||
LiveInterval &DstInt = li_->getInterval(DstReg);
|
||||
unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
|
||||
unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
|
||||
if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
|
||||
return true;
|
||||
// Estimate *register use density*. If it doubles or more, abort.
|
||||
unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
|
||||
mri_->use_nodbg_end());
|
||||
unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
|
||||
mri_->use_nodbg_end());
|
||||
float NewDensity = ((float)(SrcUses + DstUses) / (SrcSize + DstSize)) /
|
||||
NewRCCount;
|
||||
if (SrcRC != NewRC && SrcSize > NewRCCount) {
|
||||
unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
|
||||
float Density = ((float)SrcUses / SrcSize) / SrcRCCount;
|
||||
if (NewDensity > Density * 2.0f)
|
||||
return false;
|
||||
}
|
||||
if (DstRC != NewRC && DstSize > NewRCCount) {
|
||||
unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
|
||||
float Density = ((float)DstUses / DstSize) / DstRCCount;
|
||||
if (NewDensity > Density * 2.0f)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -1517,10 +1541,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
|
||||
return false; // Not coalescable
|
||||
}
|
||||
|
||||
unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
|
||||
unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
|
||||
unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
|
||||
if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
|
||||
if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
|
||||
DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
|
||||
<< SrcRC->getName() << "/"
|
||||
<< DstRC->getName() << " -> "
|
||||
<< NewRC->getName() << ".\n");
|
||||
Again = true; // May be possible to coalesce later.
|
||||
return false;
|
||||
}
|
||||
@ -1568,49 +1593,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
|
||||
}
|
||||
}
|
||||
|
||||
unsigned LargeReg = SrcReg;
|
||||
unsigned SmallReg = DstReg;
|
||||
|
||||
// Now determine the register class of the joined register.
|
||||
if (isExtSubReg) {
|
||||
if (SubIdx && DstRC && DstRC->isASubClass()) {
|
||||
// This is a move to a sub-register class. However, the source is a
|
||||
// sub-register of a larger register class. We don't know what should
|
||||
// the register class be. FIXME.
|
||||
Again = true;
|
||||
return false;
|
||||
if (!SrcIsPhys && !DstIsPhys) {
|
||||
if (isExtSubReg) {
|
||||
NewRC =
|
||||
SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
|
||||
} else if (isInsSubReg) {
|
||||
NewRC =
|
||||
SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
|
||||
} else {
|
||||
NewRC = getCommonSubClass(SrcRC, DstRC);
|
||||
}
|
||||
if (!DstIsPhys && !SrcIsPhys)
|
||||
NewRC = SrcRC;
|
||||
} else if (!SrcIsPhys && !DstIsPhys) {
|
||||
NewRC = getCommonSubClass(SrcRC, DstRC);
|
||||
|
||||
if (!NewRC) {
|
||||
DEBUG(dbgs() << "\tDisjoint regclasses: "
|
||||
<< SrcRC->getName() << ", "
|
||||
<< DstRC->getName() << ".\n");
|
||||
return false; // Not coalescable.
|
||||
}
|
||||
if (DstRC->getSize() > SrcRC->getSize())
|
||||
std::swap(LargeReg, SmallReg);
|
||||
}
|
||||
|
||||
// If we are joining two virtual registers and the resulting register
|
||||
// class is more restrictive (fewer register, smaller size). Check if it's
|
||||
// worth doing the merge.
|
||||
if (!SrcIsPhys && !DstIsPhys &&
|
||||
(isExtSubReg || DstRC->isASubClass()) &&
|
||||
!isWinToJoinCrossClass(LargeReg, SmallReg,
|
||||
allocatableRCRegs_[NewRC].count())) {
|
||||
DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
|
||||
<< SrcRC->getName() << "/"
|
||||
<< DstRC->getName() << " -> "
|
||||
<< NewRC->getName() << ".\n");
|
||||
// Allow the coalescer to try again in case either side gets coalesced to
|
||||
// a physical register that's compatible with the other side. e.g.
|
||||
// r1024 = MOV32to32_ r1025
|
||||
// But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
|
||||
Again = true; // May be possible to coalesce later.
|
||||
return false;
|
||||
// If we are joining two virtual registers and the resulting register
|
||||
// class is more restrictive (fewer register, smaller size). Check if it's
|
||||
// worth doing the merge.
|
||||
if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
|
||||
DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
|
||||
<< SrcRC->getName() << "/"
|
||||
<< DstRC->getName() << " -> "
|
||||
<< NewRC->getName() << ".\n");
|
||||
// Allow the coalescer to try again in case either side gets coalesced to
|
||||
// a physical register that's compatible with the other side. e.g.
|
||||
// r1024 = MOV32to32_ r1025
|
||||
// But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
|
||||
Again = true; // May be possible to coalesce later.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -179,8 +179,11 @@ namespace llvm {
|
||||
|
||||
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
|
||||
/// two virtual registers from different register classes.
|
||||
bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
|
||||
unsigned Threshold);
|
||||
bool isWinToJoinCrossClass(unsigned SrcReg,
|
||||
unsigned DstReg,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
const TargetRegisterClass *DstRC,
|
||||
const TargetRegisterClass *NewRC);
|
||||
|
||||
/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
|
||||
/// register with a physical register, check if any of the virtual register
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1
|
||||
|
||||
define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
|
||||
entry:
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
|
||||
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40
|
||||
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
|
||||
|
||||
%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
|
||||
%struct.XXDAlphaTest = type { float, i16, i8, i8 }
|
||||
@ -61,11 +62,15 @@
|
||||
|
||||
define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
|
||||
entry:
|
||||
; CHECK: t:
|
||||
; CHECK: xorl %ecx, %ecx
|
||||
%0 = trunc i64 %key_token to i32 ; <i32> [#uses=1]
|
||||
%1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5]
|
||||
br label %bb1132
|
||||
|
||||
bb51: ; preds = %bb1132
|
||||
; CHECK: .align 4
|
||||
; CHECK: andl $7
|
||||
%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; <i16*> [#uses=1]
|
||||
%3 = load i16* %2, align 1 ; <i16> [#uses=3]
|
||||
%4 = lshr i16 %3, 6 ; <i16> [#uses=1]
|
||||
|
@ -149,7 +149,6 @@ entry:
|
||||
|
||||
bb.nph: ; preds = %entry
|
||||
; X86-64: movq _map_4_to_16@GOTPCREL(%rip)
|
||||
; X86-64: movq _map_4_to_16@GOTPCREL(%rip)
|
||||
; X86-64: .align 4
|
||||
%tmp5 = zext i32 undef to i64 ; <i64> [#uses=1]
|
||||
%tmp6 = add i64 %tmp5, 1 ; <i64> [#uses=1]
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
|
||||
; RUN: grep asm-printer %t | grep 156
|
||||
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 4
|
||||
; RUN: grep asm-printer %t | grep 166
|
||||
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
|
||||
|
||||
type { [62 x %struct.Bitvec*] } ; type %0
|
||||
type { i8* } ; type %1
|
||||
|
Loading…
Reference in New Issue
Block a user