mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
Enable cross register class coalescing.
llvm-svn: 76281
This commit is contained in:
parent
8c7d396617
commit
84f06f0ee6
@ -484,6 +484,15 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getMatchingSuperRegClass - Return a subclass of the specified register
|
||||
/// class A so that each register in it has a sub-register of the
|
||||
/// specified sub-register index which is in the specified register class B.
|
||||
virtual const TargetRegisterClass *
|
||||
getMatchingSuperRegClass(const TargetRegisterClass *A,
|
||||
const TargetRegisterClass *B, unsigned Idx) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Register Class Information
|
||||
//
|
||||
|
@ -59,7 +59,7 @@ NewHeuristic("new-coalescer-heuristic",
|
||||
static cl::opt<bool>
|
||||
CrossClassJoin("join-cross-class-copies",
|
||||
cl::desc("Coalesce cross register class copies"),
|
||||
cl::init(false), cl::Hidden);
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
PhysJoinTweak("tweak-phys-join-heuristics",
|
||||
@ -1308,6 +1308,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
|
||||
|
||||
// Should be non-null only when coalescing to a sub-register class.
|
||||
bool CrossRC = false;
|
||||
const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
|
||||
const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
|
||||
const TargetRegisterClass *NewRC = NULL;
|
||||
MachineBasicBlock *CopyMBB = CopyMI->getParent();
|
||||
unsigned RealDstReg = 0;
|
||||
@ -1373,6 +1375,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
|
||||
}
|
||||
}
|
||||
if (SubIdx) {
|
||||
if (isInsSubReg || isSubRegToReg) {
|
||||
if (!DstIsPhys && !SrcIsPhys) {
|
||||
NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx);
|
||||
if (!NewRC)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
|
||||
unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
|
||||
unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
|
||||
@ -1424,11 +1433,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
|
||||
const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
|
||||
unsigned LargeReg = SrcReg;
|
||||
unsigned SmallReg = DstReg;
|
||||
unsigned Limit = 0;
|
||||
|
||||
// Now determine the register class of the joined register.
|
||||
if (isExtSubReg) {
|
||||
@ -1439,7 +1445,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
|
||||
Again = true;
|
||||
return false;
|
||||
}
|
||||
Limit = allocatableRCRegs_[DstRC].count();
|
||||
if (!DstIsPhys && !SrcIsPhys)
|
||||
NewRC = SrcRC;
|
||||
} else if (!SrcIsPhys && !DstIsPhys) {
|
||||
NewRC = getCommonSubClass(SrcRC, DstRC);
|
||||
if (!NewRC) {
|
||||
@ -1643,11 +1650,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
|
||||
|
||||
// Coalescing to a virtual register that is of a sub-register class of the
|
||||
// other. Make sure the resulting register is set to the right register class.
|
||||
if (CrossRC) {
|
||||
++numCrossRCs;
|
||||
if (NewRC)
|
||||
mri_->setRegClass(DstReg, NewRC);
|
||||
}
|
||||
if (CrossRC)
|
||||
++numCrossRCs;
|
||||
|
||||
// This may happen even if it's cross-rc coalescing. e.g.
|
||||
// %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4
|
||||
// reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to
|
||||
// be allocate a register from GR64_ABCD.
|
||||
if (NewRC)
|
||||
mri_->setRegClass(DstReg, NewRC);
|
||||
|
||||
if (NewHeuristic) {
|
||||
// Add all copies that define val# in the source interval into the queue.
|
||||
|
@ -1344,6 +1344,31 @@ private:
|
||||
++NumStores;
|
||||
}
|
||||
|
||||
/// isSafeToDelete - Return true if this instruction doesn't produce any side
|
||||
/// effect and all of its defs are dead.
|
||||
static bool isSafeToDelete(MachineInstr &MI) {
|
||||
const TargetInstrDesc &TID = MI.getDesc();
|
||||
if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
|
||||
TID.isCall() || TID.isBarrier() || TID.isReturn() ||
|
||||
TID.hasUnmodeledSideEffects())
|
||||
return false;
|
||||
if (TID.getImplicitDefs())
|
||||
return false;
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI.getOperand(i);
|
||||
if (!MO.isReg() || !MO.getReg())
|
||||
continue;
|
||||
if (MO.isDef() && !MO.isDead())
|
||||
return false;
|
||||
if (MO.isUse() && MO.isKill())
|
||||
// FIXME: We can't remove kill markers or else the scavenger will assert.
|
||||
// An alternative is to add a ADD pseudo instruction to replace kill
|
||||
// markers.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// TransferDeadness - A identity copy definition is dead and it's being
|
||||
/// removed. Find the last def or use and mark it as dead / kill.
|
||||
void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
|
||||
@ -1385,9 +1410,7 @@ private:
|
||||
if (LastUD->isDef()) {
|
||||
// If the instruction has no side effect, delete it and propagate
|
||||
// backward further. Otherwise, mark is dead and we are done.
|
||||
const TargetInstrDesc &TID = LastUDMI->getDesc();
|
||||
if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
|
||||
TID.hasUnmodeledSideEffects()) {
|
||||
if (!isSafeToDelete(*LastUDMI)) {
|
||||
LastUD->setIsDead();
|
||||
break;
|
||||
}
|
||||
@ -2170,7 +2193,15 @@ private:
|
||||
}
|
||||
}
|
||||
ProcessNextInst:
|
||||
DistanceMap.insert(std::make_pair(&MI, Dist++));
|
||||
// Delete dead instructions without side effects.
|
||||
if (!Erased && !BackTracked && isSafeToDelete(MI)) {
|
||||
InvalidateKills(MI, TRI, RegKills, KillOps);
|
||||
VRM.RemoveMachineInstrFromMaps(&MI);
|
||||
MBB.erase(&MI);
|
||||
Erased = true;
|
||||
}
|
||||
if (!Erased)
|
||||
DistanceMap.insert(std::make_pair(&MI, Dist++));
|
||||
if (!Erased && !BackTracked) {
|
||||
for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
|
||||
UpdateKills(*II, TRI, RegKills, KillOps);
|
||||
|
@ -152,6 +152,84 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
|
||||
const TargetRegisterClass *B,
|
||||
unsigned SubIdx) const {
|
||||
switch (SubIdx) {
|
||||
default: return 0;
|
||||
case 1:
|
||||
// 8-bit
|
||||
if (B == &X86::GR8RegClass) {
|
||||
if (A == &X86::GR64RegClass)
|
||||
return &X86::GR64RegClass;
|
||||
else if (A == &X86::GR32RegClass)
|
||||
return &X86::GR32RegClass;
|
||||
else if (A == &X86::GR16RegClass)
|
||||
return &X86::GR16RegClass;
|
||||
} else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
|
||||
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
|
||||
return &X86::GR64_ABCDRegClass;
|
||||
else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass)
|
||||
return &X86::GR32_ABCDRegClass;
|
||||
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass)
|
||||
return &X86::GR16_ABCDRegClass;
|
||||
} else if (B == &X86::GR8_NOREXRegClass) {
|
||||
if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
|
||||
return &X86::GR64_NOREXRegClass;
|
||||
else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass)
|
||||
return &X86::GR32_NOREXRegClass;
|
||||
else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
|
||||
return &X86::GR16_NOREXRegClass;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
// 8-bit hi
|
||||
if (B == &X86::GR8_ABCD_HRegClass) {
|
||||
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
|
||||
return &X86::GR64_ABCDRegClass;
|
||||
else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass)
|
||||
return &X86::GR32_ABCDRegClass;
|
||||
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass)
|
||||
return &X86::GR16_ABCDRegClass;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// 16-bit
|
||||
if (B == &X86::GR16RegClass) {
|
||||
if (A == &X86::GR64RegClass)
|
||||
return &X86::GR64RegClass;
|
||||
else if (A == &X86::GR32RegClass)
|
||||
return &X86::GR32RegClass;
|
||||
} else if (B == &X86::GR16_ABCDRegClass) {
|
||||
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
|
||||
return &X86::GR64_ABCDRegClass;
|
||||
else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass)
|
||||
return &X86::GR32_ABCDRegClass;
|
||||
} else if (B == &X86::GR16_NOREXRegClass) {
|
||||
if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
|
||||
return &X86::GR64_NOREXRegClass;
|
||||
else if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
|
||||
return &X86::GR64_ABCDRegClass;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
// 32-bit
|
||||
if (B == &X86::GR32RegClass) {
|
||||
if (A == &X86::GR64RegClass)
|
||||
return &X86::GR64RegClass;
|
||||
} else if (B == &X86::GR32_ABCDRegClass) {
|
||||
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass)
|
||||
return &X86::GR64_ABCDRegClass;
|
||||
} else if (B == &X86::GR32_NOREXRegClass) {
|
||||
if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
|
||||
return &X86::GR64_NOREXRegClass;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const {
|
||||
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||
if (Subtarget->is64Bit())
|
||||
|
@ -93,6 +93,13 @@ public:
|
||||
/// Code Generation virtual methods...
|
||||
///
|
||||
|
||||
/// getMatchingSuperRegClass - Return a subclass of the specified register
|
||||
/// class A so that each register in it has a sub-register of the
|
||||
/// specified sub-register index which is in the specified register class B.
|
||||
virtual const TargetRegisterClass *
|
||||
getMatchingSuperRegClass(const TargetRegisterClass *A,
|
||||
const TargetRegisterClass *B, unsigned Idx) const;
|
||||
|
||||
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
|
||||
/// values.
|
||||
const TargetRegisterClass *getPointerRegClass() const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llvm-as < %s | llc | grep 328
|
||||
; RUN: llvm-as < %s | llc | grep 168
|
||||
|
||||
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
|
||||
target triple = "s390x-linux"
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | not grep movb
|
||||
|
||||
%struct.double_int = type { i64, i64 }
|
||||
%struct.tree_common = type <{ i8, [3 x i8] }>
|
||||
@ -6,7 +7,7 @@
|
||||
%struct.tree_node = type { %struct.tree_int_cst }
|
||||
@tree_code_type = external constant [0 x i32] ; <[0 x i32]*> [#uses=1]
|
||||
|
||||
define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) {
|
||||
define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
|
||||
entry:
|
||||
%tmp2526 = bitcast %struct.tree_node* %t1 to i32* ; <i32*> [#uses=1]
|
||||
br i1 false, label %UnifiedReturnBlock, label %bb21
|
||||
|
41
test/CodeGen/X86/coalescer-cross.ll
Normal file
41
test/CodeGen/X86/coalescer-cross.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | not grep movaps
|
||||
; rdar://6509240
|
||||
|
||||
type { %struct.TValue } ; type %0
|
||||
type { %struct.L_Umaxalign, i32, %struct.Node* } ; type %1
|
||||
%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
|
||||
%struct.GCObject = type { %struct.lua_State }
|
||||
%struct.L_Umaxalign = type { double }
|
||||
%struct.Mbuffer = type { i8*, i32, i32 }
|
||||
%struct.Node = type { %struct.TValue, %struct.TKey }
|
||||
%struct.TKey = type { %1 }
|
||||
%struct.TString = type { %struct.anon }
|
||||
%struct.TValue = type { %struct.L_Umaxalign, i32 }
|
||||
%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
|
||||
%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
|
||||
%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
|
||||
%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
|
||||
%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
|
||||
%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
|
||||
%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
|
||||
%struct.stringtable = type { %struct.GCObject**, i32, i32 }
|
||||
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
|
||||
|
||||
define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
|
||||
entry:
|
||||
%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind ; <i32> [#uses=1]
|
||||
%1 = uitofp i32 %0 to double ; <double> [#uses=1]
|
||||
%2 = fdiv double %1, 1.000000e+06 ; <double> [#uses=1]
|
||||
%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4 ; <%struct.TValue**> [#uses=3]
|
||||
%4 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=2]
|
||||
%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
|
||||
store double %2, double* %5, align 4
|
||||
%6 = getelementptr %struct.TValue* %4, i32 0, i32 1 ; <i32*> [#uses=1]
|
||||
store i32 3, i32* %6, align 4
|
||||
%7 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=1]
|
||||
%8 = getelementptr %struct.TValue* %7, i32 1 ; <%struct.TValue*> [#uses=1]
|
||||
store %struct.TValue* %8, %struct.TValue** %3, align 4
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
declare i32 @"\01_clock$UNIX2003"()
|
@ -1,6 +1,7 @@
|
||||
; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
|
||||
; RUN: grep stackcoloring %t | grep "loads eliminated"
|
||||
; RUN: grep stackcoloring %t | grep "stores eliminated"
|
||||
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
|
||||
; RUN: grep asm-printer %t | grep 176
|
||||
|
||||
type { [62 x %struct.Bitvec*] } ; type %0
|
||||
type { i8* } ; type %1
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llvm-as < %s | llc | grep {movq %rdi, %rax}
|
||||
; RUN: llvm-as < %s | llc
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-apple-darwin8"
|
||||
|
Loading…
Reference in New Issue
Block a user