mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
Add early if-conversion support to X86.
Implement the TII hooks needed by EarlyIfConversion to create cmov instructions and estimate their latency. Early if-conversion is still not enabled by default. llvm-svn: 159695
This commit is contained in:
parent
db187a51eb
commit
79846e5c9b
@ -2295,6 +2295,37 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
|
||||
}
|
||||
}
|
||||
|
||||
/// getCMovFromCond - Return a cmov(rr) opcode for the given condition and
|
||||
/// register size in bytes.
|
||||
static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes) {
|
||||
static const unsigned Opc[16][3] = {
|
||||
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
|
||||
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
|
||||
{ X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
|
||||
{ X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
|
||||
{ X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
|
||||
{ X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
|
||||
{ X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
|
||||
{ X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
|
||||
{ X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
|
||||
{ X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
|
||||
{ X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
|
||||
{ X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
|
||||
{ X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
|
||||
{ X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
|
||||
{ X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
|
||||
{ X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }
|
||||
};
|
||||
|
||||
assert(CC < 16 && "Can only handle standard cond codes");
|
||||
switch(RegBytes) {
|
||||
default: llvm_unreachable("Illegal register size!");
|
||||
case 2: return Opc[CC][0];
|
||||
case 4: return Opc[CC][1];
|
||||
case 8: return Opc[CC][2];
|
||||
}
|
||||
}
|
||||
|
||||
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
|
||||
if (!MI->isTerminator()) return false;
|
||||
|
||||
@ -2519,6 +2550,55 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
return Count;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::
|
||||
canInsertSelect(const MachineBasicBlock &MBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
|
||||
// Not all subtargets have cmov instructions.
|
||||
if (!TM.getSubtarget<X86Subtarget>().hasCMov())
|
||||
return false;
|
||||
if (Cond.size() != 1)
|
||||
return false;
|
||||
// We cannot do the composite conditions, at least not in SSA form.
|
||||
if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
|
||||
return false;
|
||||
|
||||
// Check register classes.
|
||||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
const TargetRegisterClass *RC =
|
||||
RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
|
||||
if (!RC)
|
||||
return false;
|
||||
|
||||
// We have cmov instructions for 16, 32, and 64 bit general purpose registers.
|
||||
if (X86::GR16RegClass.hasSubClassEq(RC) ||
|
||||
X86::GR32RegClass.hasSubClassEq(RC) ||
|
||||
X86::GR64RegClass.hasSubClassEq(RC)) {
|
||||
// This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
|
||||
// Bridge. Probably Ivy Bridge as well.
|
||||
CondCycles = 2;
|
||||
TrueCycles = 2;
|
||||
FalseCycles = 2;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Can't do vectors.
|
||||
return false;
|
||||
}
|
||||
|
||||
void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DstReg,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
unsigned TrueReg, unsigned FalseReg) const {
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
assert(Cond.size() == 1 && "Invalid Cond array");
|
||||
unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
|
||||
MRI.getRegClass(DstReg)->getSize());
|
||||
BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
|
||||
}
|
||||
|
||||
/// isHReg - Test if the given register is a physical h register.
|
||||
static bool isHReg(unsigned Reg) {
|
||||
return X86::GR8_ABCD_HRegClass.contains(Reg);
|
||||
|
@ -219,6 +219,14 @@ public:
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const;
|
||||
virtual bool canInsertSelect(const MachineBasicBlock&,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
unsigned, unsigned, int&, int&, int&) const;
|
||||
virtual void insertSelect(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DstReg,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
unsigned TrueReg, unsigned FalseReg) const;
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
|
@ -140,7 +140,12 @@ public:
|
||||
} // namespace
|
||||
|
||||
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
return new X86PassConfig(this, PM);
|
||||
X86PassConfig *PC = new X86PassConfig(this, PM);
|
||||
|
||||
if (Subtarget.hasCMov())
|
||||
PC->enablePass(&EarlyIfConverterID);
|
||||
|
||||
return PC;
|
||||
}
|
||||
|
||||
bool X86PassConfig::addInstSelector() {
|
||||
|
39
test/CodeGen/X86/early-ifcvt.ll
Normal file
39
test/CodeGen/X86/early-ifcvt.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: llc < %s -enable-early-ifcvt -stress-early-ifcvt | FileCheck %s
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; CHECK: mm2
|
||||
define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
br label %do.body
|
||||
|
||||
; CHECK: do.body
|
||||
; Loop body has no branches before the backedge.
|
||||
; CHECK-NOT: LBB
|
||||
do.body:
|
||||
%max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
|
||||
%min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
|
||||
%n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
|
||||
%p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
|
||||
%incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
|
||||
%0 = load i32* %p.addr.0, align 4
|
||||
%cmp = icmp sgt i32 %0, %max.0
|
||||
br i1 %cmp, label %do.cond, label %if.else
|
||||
|
||||
if.else:
|
||||
%cmp1 = icmp slt i32 %0, %min.0
|
||||
%.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
|
||||
br label %do.cond
|
||||
|
||||
do.cond:
|
||||
%max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
|
||||
%min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
|
||||
; CHECK: decl %esi
|
||||
; CHECK: jne LBB
|
||||
%dec = add i32 %n.addr.0, -1
|
||||
%tobool = icmp eq i32 %dec, 0
|
||||
br i1 %tobool, label %do.end, label %do.body
|
||||
|
||||
do.end:
|
||||
%sub = sub nsw i32 %max.1, %min.1
|
||||
ret i32 %sub
|
||||
}
|
Loading…
Reference in New Issue
Block a user