From e75d14f8b0755d5237c426fc2066280c63c519db Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 2 Oct 2008 22:15:21 +0000 Subject: [PATCH] Optimize conditional branches in X86FastISel. This replaces sequences like this: sete %al testb %al, %al jne LBB11_1 with this: je LBB11_1 llvm-svn: 56969 --- include/llvm/CodeGen/FastISel.h | 5 + lib/CodeGen/SelectionDAG/FastISel.cpp | 28 +++-- lib/Target/X86/X86FastISel.cpp | 160 +++++++++++++++++++++++--- 3 files changed, 166 insertions(+), 27 deletions(-) diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 9d33340de78..2b112c878c8 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -238,6 +238,11 @@ protected: /// from a specified index of a superregister. unsigned FastEmitInst_extractsubreg(unsigned Op0, uint32_t Idx); + /// FastEmitBranch - Emit an unconditional branch to the given block, + /// unless it is the immediate (fall-through) successor, and update + /// the CFG. + void FastEmitBranch(MachineBasicBlock *MBB); + void UpdateValueMap(Value* I, unsigned Reg); unsigned createResultReg(const TargetRegisterClass *RC); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 9f70bc998c9..c0e8418c4c5 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -461,6 +461,23 @@ FastISel::SelectInstruction(Instruction *I) { return SelectOperator(I, I->getOpcode()); } +/// FastEmitBranch - Emit an unconditional branch to the given block, +/// unless it is the immediate (fall-through) successor, and update +/// the CFG. +void +FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { + MachineFunction::iterator NextMBB = + next(MachineFunction::iterator(MBB)); + + if (MBB->isLayoutSuccessor(MSucc)) { + // The unconditional fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.InsertBranch(*MBB, MSucc, NULL, SmallVector()); + } + MBB->addSuccessor(MSucc); +} + bool FastISel::SelectOperator(User *I, unsigned Opcode) { switch (Opcode) { @@ -508,18 +525,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { BranchInst *BI = cast(I); if (BI->isUnconditional()) { - MachineFunction::iterator NextMBB = - next(MachineFunction::iterator(MBB)); BasicBlock *LLVMSucc = BI->getSuccessor(0); MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; - - if (NextMBB != MF.end() && MSucc == NextMBB) { - // The unconditional fall-through case, which needs no instructions. - } else { - // The unconditional branch case. - TII.InsertBranch(*MBB, MSucc, NULL, SmallVector()); - } - MBB->addSuccessor(MSucc); + FastEmitBranch(MSucc); return true; } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3d5759001f0..68d0d85501e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -90,6 +90,8 @@ private: bool X86SelectSelect(Instruction *I); bool X86SelectTrunc(Instruction *I); + + unsigned X86ChooseCmpOpcode(MVT VT); bool X86SelectFPExt(Instruction *I); bool X86SelectFPTrunc(Instruction *I); @@ -507,6 +509,19 @@ bool X86FastISel::X86SelectLoad(Instruction *I) { return false; } +unsigned X86FastISel::X86ChooseCmpOpcode(MVT VT) { + switch (VT.getSimpleVT()) { + case MVT::i8: return X86::CMP8rr; + case MVT::i16: return X86::CMP16rr; + case MVT::i32: return X86::CMP32rr; + case MVT::i64: return X86::CMP64rr; + case MVT::f32: return X86::UCOMISSrr; + case MVT::f64: return X86::UCOMISDrr; + default: break; + } + return 0; +} + bool X86FastISel::X86SelectCmp(Instruction *I) { CmpInst *CI = cast(I); @@ -519,16 +534,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { unsigned Op1Reg = getRegForValue(CI->getOperand(1)); if (Op1Reg == 0) return false; - unsigned Opc; - switch (VT.getSimpleVT()) { - case MVT::i8: Opc = X86::CMP8rr; break; - case MVT::i16: Opc = X86::CMP16rr; break; - case MVT::i32: Opc = X86::CMP32rr; break; - case MVT::i64: Opc = X86::CMP64rr; break; - case MVT::f32: Opc = X86::UCOMISSrr; break; - case MVT::f64: Opc = X86::UCOMISDrr; break; - default: return false; - } + unsigned Opc = X86ChooseCmpOpcode(VT); unsigned ResultReg = createResultReg(&X86::GR8RegClass); switch (CI->getPredicate()) { @@ -661,19 +667,139 @@ bool X86FastISel::X86SelectZExt(Instruction *I) { } bool X86FastISel::X86SelectBranch(Instruction *I) { - BranchInst *BI = cast(I); // Unconditional branches are selected by tablegen-generated code. - unsigned OpReg = getRegForValue(BI->getCondition()); - if (OpReg == 0) return false; + // Handle a conditional branch. + BranchInst *BI = cast(I); MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; - BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB); - BuildMI(MBB, TII.get(X86::JMP)).addMBB(FalseMBB); + // Fold the common case of a conditional branch with a comparison. + if (CmpInst *CI = dyn_cast(BI->getCondition())) { + if (CI->hasOneUse()) { + MVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + unsigned Opc = X86ChooseCmpOpcode(VT); + if (Opc == 0) return false; + // Try to take advantage of fallthrough opportunities. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (MBB->isLayoutSuccessor(TrueMBB)) { + std::swap(TrueMBB, FalseMBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } + + unsigned Op0Reg = getRegForValue(CI->getOperand(0)); + if (Op0Reg == 0) return false; + unsigned Op1Reg = getRegForValue(CI->getOperand(1)); + if (Op1Reg == 0) return false; + + switch (Predicate) { + case CmpInst::FCMP_OGT: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JA)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_OGE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JAE)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_OLT: + BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg); + BuildMI(MBB, TII.get(X86::JA)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_OLE: + BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg); + BuildMI(MBB, TII.get(X86::JAE)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_ONE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_ORD: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JNP)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_UNO: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JP)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_UEQ: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JE)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_UGT: + BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg); + BuildMI(MBB, TII.get(X86::JB)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_UGE: + BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg); + BuildMI(MBB, TII.get(X86::JBE)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_ULT: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JB)).addMBB(TrueMBB); + break; + case CmpInst::FCMP_ULE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JBE)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_EQ: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JE)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_NE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_UGT: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JA)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_UGE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JAE)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_ULT: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JB)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_ULE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JBE)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_SGT: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JG)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_SGE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JGE)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_SLT: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JL)).addMBB(TrueMBB); + break; + case CmpInst::ICMP_SLE: + BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(MBB, TII.get(X86::JLE)).addMBB(TrueMBB); + break; + default: + return false; + } + MBB->addSuccessor(TrueMBB); + FastEmitBranch(FalseMBB); + return true; + } + } + + // Otherwise do a clumsy setcc and re-test it. + unsigned OpReg = getRegForValue(BI->getCondition()); + if (OpReg == 0) return false; + + BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); + + BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB); MBB->addSuccessor(TrueMBB); - MBB->addSuccessor(FalseMBB); + + FastEmitBranch(FalseMBB); return true; }