diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 8ddcac70599..351217ce583 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -111,6 +111,12 @@ namespace llvm { double getScaledIntervalSize(LiveInterval& I) { return (1000.0 * I.getSize()) / indexes_->getIndexesLength(); } + + /// getFuncInstructionCount - Return the number of instructions in the + /// current function. + unsigned getFuncInstructionCount() { + return indexes_->getFunctionSize(); + } /// getApproximateInstructionCount - computes an estimate of the number /// of instructions in a given LiveInterval. diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 1f9d726143a..78011ed67f8 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -1168,20 +1168,44 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool -SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, - unsigned SmallReg, - unsigned Threshold) { - // Then make sure the intervals are *short*. - LiveInterval &LargeInt = li_->getInterval(LargeReg); - LiveInterval &SmallInt = li_->getInterval(SmallReg); - unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); - unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); - if (LargeSize > Threshold) { - unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg), - mri_->use_nodbg_end()); - unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg), - mri_->use_nodbg_end()); - if (SmallUses*LargeSize < LargeUses*SmallSize) +SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC) { + unsigned NewRCCount = allocatableRCRegs_[NewRC].count(); + // This heuristics is good enough in practice, but it's obviously not *right*. + // 4 is a magic number that works well enough for x86, ARM, etc. It filter + // out all but the most restrictive register classes. + if (NewRCCount > 4 || + // Early exit if the function is fairly small, coalesce aggressively if + // that's the case. For really special register classes with 3 or + // fewer registers, be a bit more careful. + (li_->getFuncInstructionCount() / NewRCCount) < 8) + return true; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); + unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + if (SrcSize <= NewRCCount && DstSize <= NewRCCount) + return true; + // Estimate *register use density*. If it doubles or more, abort. + unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), + mri_->use_nodbg_end()); + unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), + mri_->use_nodbg_end()); + float NewDensity = ((float)(SrcUses + DstUses) / (SrcSize + DstSize)) / + NewRCCount; + if (SrcRC != NewRC && SrcSize > NewRCCount) { + unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count(); + float Density = ((float)SrcUses / SrcSize) / SrcRCCount; + if (NewDensity > Density * 2.0f) + return false; + } + if (DstRC != NewRC && DstSize > NewRCCount) { + unsigned DstRCCount = allocatableRCRegs_[DstRC].count(); + float Density = ((float)DstUses / DstSize) / DstRCCount; + if (NewDensity > Density * 2.0f) return false; } return true; @@ -1517,10 +1541,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable } - unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; - unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; - unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); - if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) { + if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1568,49 +1593,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } - unsigned LargeReg = SrcReg; - unsigned SmallReg = DstReg; - // Now determine the register class of the joined register. - if (isExtSubReg) { - if (SubIdx && DstRC && DstRC->isASubClass()) { - // This is a move to a sub-register class. However, the source is a - // sub-register of a larger register class. We don't know what should - // the register class be. FIXME. - Again = true; - return false; + if (!SrcIsPhys && !DstIsPhys) { + if (isExtSubReg) { + NewRC = + SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC; + } else if (isInsSubReg) { + NewRC = + SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC; + } else { + NewRC = getCommonSubClass(SrcRC, DstRC); } - if (!DstIsPhys && !SrcIsPhys) - NewRC = SrcRC; - } else if (!SrcIsPhys && !DstIsPhys) { - NewRC = getCommonSubClass(SrcRC, DstRC); + if (!NewRC) { DEBUG(dbgs() << "\tDisjoint regclasses: " << SrcRC->getName() << ", " << DstRC->getName() << ".\n"); return false; // Not coalescable. } - if (DstRC->getSize() > SrcRC->getSize()) - std::swap(LargeReg, SmallReg); - } - // If we are joining two virtual registers and the resulting register - // class is more restrictive (fewer register, smaller size). Check if it's - // worth doing the merge. - if (!SrcIsPhys && !DstIsPhys && - (isExtSubReg || DstRC->isASubClass()) && - !isWinToJoinCrossClass(LargeReg, SmallReg, - allocatableRCRegs_[NewRC].count())) { - DEBUG(dbgs() << "\tSrc/Dest are different register classes: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - // Allow the coalescer to try again in case either side gets coalesced to - // a physical register that's compatible with the other side. e.g. - // r1024 = MOV32to32_ r1025 - // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. - Again = true; // May be possible to coalesce later. - return false; + // If we are joining two virtual registers and the resulting register + // class is more restrictive (fewer register, smaller size). Check if it's + // worth doing the merge. + if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); + // Allow the coalescer to try again in case either side gets coalesced to + // a physical register that's compatible with the other side. e.g. + // r1024 = MOV32to32_ r1025 + // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + Again = true; // May be possible to coalesce later. + return false; + } } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index f668064ab08..bd2d0bb90ab 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -179,8 +179,11 @@ namespace llvm { /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. - bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg, - unsigned Threshold); + bool isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC); /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual /// register with a physical register, check if any of the virtual register diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index 2b209319792..6c453499a43 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind { entry: diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll index b8ca364d179..f811230ce43 100644 --- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll +++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi} +; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40 +; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s %struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 } %struct.XXDAlphaTest = type { float, i16, i8, i8 } @@ -61,11 +62,15 @@ define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind { entry: +; CHECK: t: +; CHECK: xorl %ecx, %ecx %0 = trunc i64 %key_token to i32 ; [#uses=1] %1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5] br label %bb1132 bb51: ; preds = %bb1132 +; CHECK: .align 4 +; CHECK: andl $7 %2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; [#uses=1] %3 = load i16* %2, align 1 ; [#uses=3] %4 = lshr i16 %3, 6 ; [#uses=1] diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll index db7a9eccbec..97cc7b4977c 100644 --- a/test/CodeGen/X86/postra-licm.ll +++ b/test/CodeGen/X86/postra-licm.ll @@ -149,7 +149,6 @@ entry: bb.nph: ; preds = %entry ; X86-64: movq _map_4_to_16@GOTPCREL(%rip) -; X86-64: movq _map_4_to_16@GOTPCREL(%rip) ; X86-64: .align 4 %tmp5 = zext i32 undef to i64 ; [#uses=1] %tmp6 = add i64 %tmp5, 1 ; [#uses=1] diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll index 83f56c10bb7..001a5409640 100644 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ b/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN: grep asm-printer %t | grep 156 -; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 4 +; RUN: grep asm-printer %t | grep 166 +; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5 type { [62 x %struct.Bitvec*] } ; type %0 type { i8* } ; type %1