1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665
This commit is contained in:
Amara Emerson 2020-08-24 14:10:38 -07:00
parent a7636dc8f8
commit 6f86f1afef
6 changed files with 853 additions and 243 deletions

View File

@ -299,6 +299,27 @@ private:
bool translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder);
/// If the set of cases should be emitted as a series of branches, return
/// true. If we should emit this as a bunch of and/or'd together conditions,
/// return false.
bool shouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
/// Helper method for findMergedConditions.
/// This function emits a branch and is used at the leaves of an OR or an
/// AND operator tree.
void emitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
BranchProbability TProb,
BranchProbability FProb, bool InvertCond);
/// Used during condbr translation to find trees of conditions that can be
/// optimized.
void findMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc, BranchProbability TProb,
BranchProbability FProb, bool InvertCond);
/// Translate branch (br) instruction.
/// \pre \p U is a branch instruction.
bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);

View File

@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@ -49,11 +50,13 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@ -360,28 +363,276 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
const BranchInst &BrInst = cast<BranchInst>(U);
unsigned Succ = 0;
if (!BrInst.isUnconditional()) {
// We want a G_BRCOND to the true BB followed by an unconditional branch.
Register Tst = getOrCreateVReg(*BrInst.getCondition());
const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
MachineBasicBlock &TrueBB = getMBB(TrueTgt);
MIRBuilder.buildBrCond(Tst, TrueBB);
void IRTranslator::emitBranchForMergedCondition(
const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
// If the leaf of the tree is a comparison, merge the condition into
// the caseblock.
if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
CmpInst::Predicate Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
} else {
const FCmpInst *FC = cast<FCmpInst>(Cond);
Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
}
SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
CurBuilder->getDebugLoc(), TProb, FProb);
SL->SwitchCases.push_back(CB);
return;
}
const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
MachineBasicBlock &TgtBB = getMBB(BrTgt);
MachineBasicBlock &CurBB = MIRBuilder.getMBB();
// Create a CaseBlock record representing this branch.
CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
SwitchCG::CaseBlock CB(
Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
SL->SwitchCases.push_back(CB);
}
// If the unconditional target is the layout successor, fallthrough.
if (!CurBB.isLayoutSuccessor(&TgtBB))
MIRBuilder.buildBr(TgtBB);
static bool isValInBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
return true;
}
// Link successors.
for (const BasicBlock *Succ : successors(&BrInst))
CurBB.addSuccessor(&getMBB(*Succ));
void IRTranslator::findMergedConditions(
const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc, BranchProbability TProb,
BranchProbability FProb, bool InvertCond) {
using namespace PatternMatch;
assert((Opc == Instruction::And || Opc == Instruction::Or) &&
"Expected Opc to be AND/OR");
// Skip over not part of the tree and remember to invert op and operands at
// next level.
Value *NotCond;
if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
isValInBlock(NotCond, CurBB->getBasicBlock())) {
findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
!InvertCond);
return;
}
const Instruction *BOp = dyn_cast<Instruction>(Cond);
// Compute the effective opcode for Cond, taking into account whether it needs
// to be inverted, e.g.
// and (not (or A, B)), C
// gets lowered as
// and (and (not A, not B), C)
unsigned BOpc = 0;
if (BOp) {
BOpc = BOp->getOpcode();
if (InvertCond) {
if (BOpc == Instruction::And)
BOpc = Instruction::Or;
else if (BOpc == Instruction::Or)
BOpc = Instruction::And;
}
}
// If this node is not part of the or/and tree, emit it as a branch.
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
BOpc != static_cast<unsigned>(Opc) || !BOp->hasOneUse() ||
BOp->getParent() != CurBB->getBasicBlock() ||
!isValInBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!isValInBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
InvertCond);
return;
}
// Create TmpBB after CurBB.
MachineFunction::iterator BBI(CurBB);
MachineBasicBlock *TmpBB =
MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
CurBB->getParent()->insert(++BBI, TmpBB);
if (Opc == Instruction::Or) {
// Codegen X | Y as:
// BB1:
// jmp_if_X TBB
// jmp TmpBB
// TmpBB:
// jmp_if_Y TBB
// jmp FBB
//
// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
// The requirement is that
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
// = TrueProb for original BB.
// Assuming the original probabilities are A and B, one choice is to set
// BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
// A/(1+B) and 2B/(1+B). This choice assumes that
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
auto NewTrueProb = TProb / 2;
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
findMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
NewTrueProb, NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
Probs[0], Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
// BB1:
// jmp_if_X TmpBB
// jmp FBB
// TmpBB:
// jmp_if_Y TBB
// jmp FBB
//
// This requires creation of TmpBB after CurBB.
// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
// The requirement is that
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
// = FalseProb for original BB.
// Assuming the original probabilities are A and B, one choice is to set
// BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
// 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
// TrueProb for BB1 * FalseProb for TmpBB.
auto NewTrueProb = TProb + FProb / 2;
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
findMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
NewTrueProb, NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
Probs[0], Probs[1], InvertCond);
}
}
bool IRTranslator::shouldEmitAsBranches(
const std::vector<SwitchCG::CaseBlock> &Cases) {
// For multiple cases, it's better to emit as branches.
if (Cases.size() != 2)
return true;
// If this is two comparisons of the same values or'd or and'd together, they
// will get folded into a single comparison, so don't emit two blocks.
if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
Cases[0].CmpRHS == Cases[1].CmpRHS) ||
(Cases[0].CmpRHS == Cases[1].CmpLHS &&
Cases[0].CmpLHS == Cases[1].CmpRHS)) {
return false;
}
// Handle: (X != null) | (Y != null) --> (X|Y) != 0
// Handle: (X == null) & (Y == null) --> (X|Y) == 0
if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
isa<Constant>(Cases[0].CmpRHS) &&
cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
Cases[0].TrueBB == Cases[1].ThisBB)
return false;
if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
Cases[0].FalseBB == Cases[1].ThisBB)
return false;
}
return true;
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
const BranchInst &BrInst = cast<BranchInst>(U);
auto &CurMBB = MIRBuilder.getMBB();
auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
if (BrInst.isUnconditional()) {
// If the unconditional target is the layout successor, fallthrough.
if (!CurMBB.isLayoutSuccessor(Succ0MBB))
MIRBuilder.buildBr(*Succ0MBB);
// Link successors.
for (const BasicBlock *Succ : successors(&BrInst))
CurMBB.addSuccessor(&getMBB(*Succ));
return true;
}
// If this condition is one of the special cases we handle, do special stuff
// now.
const Value *CondVal = BrInst.getCondition();
MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
const auto &TLI = *MF->getSubtarget().getTargetLowering();
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
// As long as jumps are not expensive (exceptions for multi-use logic ops,
// unpredictable branches, and vector extracts because those jumps are likely
// expensive for any target), this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
// cmp D, E
// F = setle
// or C, F
// jnz foo
// Emit:
// cmp A, B
// je foo
// cmp D, E
// jle foo
using namespace PatternMatch;
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
if (!TLI.isJumpExpensive() && BOp->hasOneUse() &&
!BrInst.hasMetadata(LLVMContext::MD_unpredictable) &&
(Opcode == Instruction::And || Opcode == Instruction::Or) &&
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
findMergedConditions(BOp, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
getEdgeProbability(&CurMBB, Succ0MBB),
getEdgeProbability(&CurMBB, Succ1MBB),
/*InvertCond=*/false);
assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
// Allow some cases to be rejected.
if (shouldEmitAsBranches(SL->SwitchCases)) {
// Emit the branch for this block.
emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
SL->SwitchCases.erase(SL->SwitchCases.begin());
return true;
}
// Okay, we decided not to do this, remove any inserted MBB's and clear
// SwitchCases.
for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
MF->erase(SL->SwitchCases[I].ThisBB);
SL->SwitchCases.clear();
}
}
// Create a CaseBlock record representing this branch.
SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
ConstantInt::getTrue(MF->getFunction().getContext()),
nullptr, Succ0MBB, Succ1MBB, &CurMBB,
CurBuilder->getDebugLoc());
// Use emitSwitchCase to actually insert the fast branch sequence for this
// cond branch.
emitSwitchCase(CB, &CurMBB, *CurBuilder);
return true;
}
@ -567,8 +818,23 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
const LLT i1Ty = LLT::scalar(1);
// Build the compare.
if (!CB.CmpMHS) {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
// For conditional branch lowering, we might try to do something silly like
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
// just re-use the existing condition vreg.
if (CI && CI->getZExtValue() == 1 &&
MRI->getType(CondLHS).getSizeInBits() == 1 &&
CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
Cond = CondLHS;
} else {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
Cond =
MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
else
Cond =
MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
}
} else {
assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
"Can only handle SLE ranges");
@ -601,17 +867,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
CB.ThisBB->normalizeSuccProbs();
// if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock())
addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
CB.ThisBB);
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
if (CB.TrueBB == CB.ThisBB->getNextNode()) {
std::swap(CB.TrueBB, CB.FalseBB);
auto True = MIB.buildConstant(i1Ty, 1);
Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0);
}
addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
CB.ThisBB);
MIB.buildBrCond(Cond, *CB.TrueBB);
MIB.buildBr(*CB.FalseBB);
@ -2590,6 +2847,10 @@ void IRTranslator::finalizeBasicBlock() {
emitJumpTable(JTCase.second, JTCase.second.MBB);
}
SL->JTCases.clear();
for (auto &SwCase : SL->SwitchCases)
emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
SL->SwitchCases.clear();
}
void IRTranslator::finalizeFunction() {

View File

@ -1313,10 +1313,8 @@ define i32 @range_test(i32 %x) {
; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C1]]
; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[SUB]](s32), [[C5]]
; CHECK: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[C6]]
; CHECK: G_BRCOND [[XOR]](s1), %bb.4
; CHECK: G_BR %bb.2
; CHECK: G_BRCOND [[ICMP1]](s1), %bb.2
; CHECK: G_BR %bb.4
; CHECK: bb.2.sw.bb:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY]], [[C3]]

View File

@ -0,0 +1,234 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple aarch64 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
declare i32 @bar(...)
define void @or_cond(i32 %X, i32 %Y, i32 %Z) nounwind {
; CHECK-LABEL: name: or_cond
; CHECK: bb.1.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]]
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]]
; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]]
; CHECK: G_BRCOND [[ICMP2]](s1), %bb.2
; CHECK: G_BR %bb.4
; CHECK: bb.4.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2
; CHECK: G_BR %bb.3
; CHECK: bb.2.cond_true:
; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp
; CHECK: bb.3.UnifiedReturnBlock:
; CHECK: RET_ReallyLR
entry:
%tmp1 = icmp eq i32 %X, 0
%tmp3 = icmp slt i32 %Y, 5
%tmp4 = or i1 %tmp3, %tmp1
br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
cond_true:
%tmp5 = tail call i32 (...) @bar( )
ret void
UnifiedReturnBlock:
ret void
}
define void @and_cond(i32 %X, i32 %Y, i32 %Z) nounwind {
; CHECK-LABEL: name: and_cond
; CHECK: bb.1.entry:
; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]]
; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP]]
; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]]
; CHECK: G_BRCOND [[ICMP2]](s1), %bb.4
; CHECK: G_BR %bb.3
; CHECK: bb.4.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2
; CHECK: G_BR %bb.3
; CHECK: bb.2.cond_true:
; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp
; CHECK: bb.3.UnifiedReturnBlock:
; CHECK: RET_ReallyLR
entry:
%tmp1 = icmp eq i32 %X, 0
%tmp3 = icmp slt i32 %Y, 5
%tmp4 = and i1 %tmp3, %tmp1
br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
cond_true:
%tmp5 = tail call i32 (...) @bar( )
ret void
UnifiedReturnBlock:
ret void
}
; Don't emit two branches for same operands.
define void @or_cond_same_values_cmp(i32 %X, i32 %Y, i32 %Z) nounwind {
; CHECK-LABEL: name: or_cond_same_values_cmp
; CHECK: bb.1.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]]
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]]
; CHECK: G_BRCOND [[OR]](s1), %bb.2
; CHECK: G_BR %bb.3
; CHECK: bb.2.cond_true:
; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp
; CHECK: bb.3.UnifiedReturnBlock:
; CHECK: RET_ReallyLR
entry:
%tmp1 = icmp eq i32 %X, 5
%tmp3 = icmp slt i32 %X, 5
%tmp4 = or i1 %tmp3, %tmp1
br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
cond_true:
%tmp5 = tail call i32 (...) @bar( )
ret void
UnifiedReturnBlock:
ret void
}
; Emit multiple branches for more than 2 cases.
define void @or_cond_multiple_cases(i32 %X, i32 %Y, i32 %Z) nounwind {
; CHECK-LABEL: name: or_cond_multiple_cases
; CHECK: bb.1.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000)
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]]
; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]]
; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[OR]], [[ICMP2]]
; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]]
; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2
; CHECK: G_BR %bb.5
; CHECK: bb.5.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: G_BRCOND [[ICMP4]](s1), %bb.2
; CHECK: G_BR %bb.4
; CHECK: bb.4.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
; CHECK: G_BRCOND [[ICMP5]](s1), %bb.2
; CHECK: G_BR %bb.3
; CHECK: bb.2.cond_true:
; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp
; CHECK: bb.3.UnifiedReturnBlock:
; CHECK: RET_ReallyLR
entry:
%tmp1 = icmp eq i32 %X, 5
%tmp3 = icmp slt i32 %X, 5
%tmpZ = icmp eq i32 %Z, 5
%tmp4 = or i1 %tmp3, %tmp1
%final = or i1 %tmp4, %tmpZ
br i1 %final, label %cond_true, label %UnifiedReturnBlock
cond_true:
%tmp5 = tail call i32 (...) @bar( )
ret void
UnifiedReturnBlock:
ret void
}
; (X != null) | (Y != null) --> (X|Y) != 0
; Don't emit two branches.
define void @or_cond_ne_null(i32 %X, i32 %Y, i32 %Z) nounwind {
; CHECK-LABEL: name: or_cond_ne_null
; CHECK: bb.1.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]]
; CHECK: G_BRCOND [[OR]](s1), %bb.2
; CHECK: G_BR %bb.3
; CHECK: bb.2.cond_true:
; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp
; CHECK: bb.3.UnifiedReturnBlock:
; CHECK: RET_ReallyLR
entry:
%tmp1 = icmp ne i32 %X, 0
%tmp3 = icmp ne i32 %Y, 0
%tmp4 = or i1 %tmp3, %tmp1
br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
cond_true:
%tmp5 = tail call i32 (...) @bar( )
ret void
UnifiedReturnBlock:
ret void
}
; If the branch is unpredictable, don't add another branch
; regardless of whether they are expensive or not.
define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
; CHECK-LABEL: name: unpredictable
; CHECK: bb.1.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]]
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]]
; CHECK: G_BRCOND [[OR]](s1), %bb.2
; CHECK: G_BR %bb.3
; CHECK: bb.2.cond_true:
; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp
; CHECK: bb.3.UnifiedReturnBlock:
; CHECK: RET_ReallyLR
entry:
%tmp1 = icmp eq i32 %X, 0
%tmp3 = icmp slt i32 %Y, 5
%tmp4 = or i1 %tmp3, %tmp1
br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock, !unpredictable !0
cond_true:
%tmp5 = tail call i32 (...) @bar( )
ret void
UnifiedReturnBlock:
ret void
}
!0 = !{}

View File

@ -20,88 +20,100 @@ define void @long_chain_ambiguous_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32*
; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $8, $BB0_9
; MIPS32-NEXT: bnez $8, $BB0_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB0_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB0_4
; MIPS32-NEXT: bnez $2, $BB0_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB0_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB0_5
; MIPS32-NEXT: bnez $2, $BB0_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB0_6: # %b.PHI.1
; MIPS32-NEXT: $BB0_9: # %b.PHI.1
; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $3, $2, 1
; MIPS32-NEXT: move $4, $1
; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $3, $BB0_8
; MIPS32-NEXT: bnez $3, $BB0_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB0_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end
; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_9: # %pre.PHI.2
; MIPS32-NEXT: $BB0_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB0_11
; MIPS32-NEXT: bnez $2, $BB0_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB0_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB0_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB0_13
; MIPS32-NEXT: j $BB0_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB0_13: # %b.PHI.2
; MIPS32-NEXT: $BB0_16: # %b.PHI.2
; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $3, $2, 1
; MIPS32-NEXT: move $4, $1
; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $3, $BB0_15
; MIPS32-NEXT: bnez $3, $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB0_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_15: # %b.PHI.3
; MIPS32-NEXT: $BB0_19: # %b.PHI.3
; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload
@ -197,35 +209,44 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* %
; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $8, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $9, $BB1_9
; MIPS32-NEXT: bnez $9, $BB1_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB1_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB1_4
; MIPS32-NEXT: bnez $2, $BB1_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB1_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB1_5
; MIPS32-NEXT: bnez $2, $BB1_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB1_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_6
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_6
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_6: # %b.PHI.1
; MIPS32-NEXT: $BB1_9: # %b.PHI.1
; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 40($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $3, $2, 1
@ -234,37 +255,37 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* %
; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $3, $BB1_8
; MIPS32-NEXT: bnez $3, $BB1_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB1_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end
; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_9: # %pre.PHI.2
; MIPS32-NEXT: $BB1_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB1_11
; MIPS32-NEXT: bnez $2, $BB1_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB1_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB1_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_13
; MIPS32-NEXT: j $BB1_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_13: # %b.PHI.2
; MIPS32-NEXT: $BB1_16: # %b.PHI.2
; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $3, $2, 1
@ -273,16 +294,19 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* %
; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $3, $BB1_15
; MIPS32-NEXT: bnez $3, $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB1_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_15: # %b.PHI.3
; MIPS32-NEXT: $BB1_19: # %b.PHI.3
; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $3, 40($sp) # 4-byte Folded Reload
@ -375,88 +399,100 @@ define void @long_chain_ambiguous_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, flo
; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $8, $BB2_9
; MIPS32-NEXT: bnez $8, $BB2_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB2_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB2_4
; MIPS32-NEXT: bnez $2, $BB2_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB2_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB2_5
; MIPS32-NEXT: bnez $2, $BB2_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB2_6: # %b.PHI.1
; MIPS32-NEXT: $BB2_9: # %b.PHI.1
; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $3, $2, 1
; MIPS32-NEXT: move $4, $1
; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $3, $BB2_8
; MIPS32-NEXT: bnez $3, $BB2_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB2_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end
; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_9: # %pre.PHI.2
; MIPS32-NEXT: $BB2_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB2_11
; MIPS32-NEXT: bnez $2, $BB2_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB2_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB2_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB2_13
; MIPS32-NEXT: j $BB2_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB2_13: # %b.PHI.2
; MIPS32-NEXT: $BB2_16: # %b.PHI.2
; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $3, $2, 1
; MIPS32-NEXT: move $4, $1
; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $3, $BB2_15
; MIPS32-NEXT: bnez $3, $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB2_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
; MIPS32-NEXT: addiu $sp, $sp, 48
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_15: # %b.PHI.3
; MIPS32-NEXT: $BB2_19: # %b.PHI.3
; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload
@ -553,35 +589,44 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill
; MIPS32-NEXT: swc1 $f0, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $8, $BB3_9
; MIPS32-NEXT: bnez $8, $BB3_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB3_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB3_4
; MIPS32-NEXT: bnez $2, $BB3_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB3_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB3_5
; MIPS32-NEXT: bnez $2, $BB3_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB3_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB3_6
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB3_6
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB3_6: # %b.PHI.1
; MIPS32-NEXT: $BB3_9: # %b.PHI.1
; MIPS32-NEXT: lwc1 $f0, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
@ -590,37 +635,37 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB3_8
; MIPS32-NEXT: bnez $2, $BB3_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB3_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end
; MIPS32-NEXT: lwc1 $f0, 16($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: swc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_9: # %pre.PHI.2
; MIPS32-NEXT: $BB3_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB3_11
; MIPS32-NEXT: bnez $2, $BB3_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB3_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB3_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB3_13
; MIPS32-NEXT: j $BB3_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f0, 0($1)
; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB3_13: # %b.PHI.2
; MIPS32-NEXT: $BB3_16: # %b.PHI.2
; MIPS32-NEXT: lwc1 $f0, 4($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
@ -629,16 +674,19 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
; MIPS32-NEXT: swc1 $f0, 0($sp) # 4-byte Folded Spill
; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB3_15
; MIPS32-NEXT: bnez $2, $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB3_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end
; MIPS32-NEXT: lwc1 $f0, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload
; MIPS32-NEXT: swc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 56
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_15: # %b.PHI.3
; MIPS32-NEXT: $BB3_19: # %b.PHI.3
; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lwc1 $f1, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload

View File

@ -20,88 +20,100 @@ define void @long_chain_ambiguous_i64_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64*
; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $8, $BB0_9
; MIPS32-NEXT: bnez $8, $BB0_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB0_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB0_4
; MIPS32-NEXT: bnez $2, $BB0_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB0_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB0_5
; MIPS32-NEXT: bnez $2, $BB0_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB0_6
; MIPS32-NEXT: j $BB0_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB0_6: # %b.PHI.1
; MIPS32-NEXT: $BB0_9: # %b.PHI.1
; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB0_8
; MIPS32-NEXT: bnez $2, $BB0_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB0_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end
; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_9: # %pre.PHI.2
; MIPS32-NEXT: $BB0_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB0_11
; MIPS32-NEXT: bnez $2, $BB0_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB0_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB0_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB0_13
; MIPS32-NEXT: j $BB0_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB0_13: # %b.PHI.2
; MIPS32-NEXT: $BB0_16: # %b.PHI.2
; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB0_15
; MIPS32-NEXT: bnez $2, $BB0_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB0_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB0_15: # %b.PHI.3
; MIPS32-NEXT: $BB0_19: # %b.PHI.3
; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
@ -197,41 +209,50 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32-NEXT: sw $2, 56($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 52($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $8, 48($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $9, $BB1_9
; MIPS32-NEXT: bnez $9, $BB1_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB1_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB1_4
; MIPS32-NEXT: bnez $2, $BB1_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB1_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB1_5
; MIPS32-NEXT: bnez $2, $BB1_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB1_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: lw $3, 4($1)
; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_6
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: lw $3, 4($1)
; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_6
; MIPS32-NEXT: j $BB1_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: lw $3, 4($1)
; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_6: # %b.PHI.1
; MIPS32-NEXT: $BB1_9: # %b.PHI.1
; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $3, 64($sp) # 4-byte Folded Reload
@ -246,12 +267,12 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $4, $BB1_8
; MIPS32-NEXT: bnez $4, $BB1_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB1_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end
; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
@ -260,29 +281,29 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32-NEXT: addiu $sp, $sp, 80
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_9: # %pre.PHI.2
; MIPS32-NEXT: $BB1_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB1_11
; MIPS32-NEXT: bnez $2, $BB1_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB1_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB1_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: lw $3, 4($1)
; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: j $BB1_13
; MIPS32-NEXT: j $BB1_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 0($1)
; MIPS32-NEXT: lw $3, 4($1)
; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
; MIPS32-NEXT: $BB1_13: # %b.PHI.2
; MIPS32-NEXT: $BB1_16: # %b.PHI.2
; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $3, 68($sp) # 4-byte Folded Reload
@ -297,9 +318,12 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $4, $BB1_15
; MIPS32-NEXT: bnez $4, $BB1_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB1_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end
; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sw $1, 0($2)
@ -308,7 +332,7 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* %
; MIPS32-NEXT: addiu $sp, $sp, 80
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB1_15: # %b.PHI.3
; MIPS32-NEXT: $BB1_19: # %b.PHI.3
; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload
@ -408,88 +432,100 @@ define void @long_chain_ambiguous_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, do
; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill
; MIPS32-NEXT: bnez $8, $BB2_9
; MIPS32-NEXT: bnez $8, $BB2_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB2_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB2_4
; MIPS32-NEXT: bnez $2, $BB2_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB2_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB2_5
; MIPS32-NEXT: bnez $2, $BB2_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB2_6
; MIPS32-NEXT: j $BB2_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB2_6: # %b.PHI.1
; MIPS32-NEXT: $BB2_9: # %b.PHI.1
; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB2_8
; MIPS32-NEXT: bnez $2, $BB2_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB2_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end
; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_9: # %pre.PHI.2
; MIPS32-NEXT: $BB2_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB2_11
; MIPS32-NEXT: bnez $2, $BB2_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB2_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB2_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB2_13
; MIPS32-NEXT: j $BB2_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB2_13: # %b.PHI.2
; MIPS32-NEXT: $BB2_16: # %b.PHI.2
; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: mov.d $f2, $f0
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB2_15
; MIPS32-NEXT: bnez $2, $BB2_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB2_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 72
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB2_15: # %b.PHI.3
; MIPS32-NEXT: $BB2_19: # %b.PHI.3
; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload
@ -588,35 +624,44 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a,
; MIPS32-NEXT: sw $2, 64($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sw $3, 60($sp) # 4-byte Folded Spill
; MIPS32-NEXT: sdc1 $f0, 48($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $8, $BB3_9
; MIPS32-NEXT: bnez $8, $BB3_12
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.1: # %pre.PHI.1
; MIPS32-NEXT: # %bb.1: # %entry
; MIPS32-NEXT: j $BB3_2
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_2: # %pre.PHI.1
; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB3_4
; MIPS32-NEXT: bnez $2, $BB3_7
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0
; MIPS32-NEXT: # %bb.3: # %pre.PHI.1
; MIPS32-NEXT: j $BB3_4
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0
; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB3_5
; MIPS32-NEXT: bnez $2, $BB3_8
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0
; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0
; MIPS32-NEXT: j $BB3_6
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0
; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB3_6
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_4: # %b.PHI.1.1
; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1
; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB3_6
; MIPS32-NEXT: j $BB3_9
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_5: # %b.PHI.1.2
; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2
; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB3_6: # %b.PHI.1
; MIPS32-NEXT: $BB3_9: # %b.PHI.1
; MIPS32-NEXT: ldc1 $f0, 40($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
@ -625,37 +670,37 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a,
; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB3_8
; MIPS32-NEXT: bnez $2, $BB3_11
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.7: # %b.PHI.1
; MIPS32-NEXT: j $BB3_15
; MIPS32-NEXT: # %bb.10: # %b.PHI.1
; MIPS32-NEXT: j $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_8: # %b.PHI.1.end
; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end
; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 88
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_9: # %pre.PHI.2
; MIPS32-NEXT: $BB3_12: # %pre.PHI.2
; MIPS32-NEXT: lw $1, 80($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
; MIPS32-NEXT: bnez $2, $BB3_11
; MIPS32-NEXT: bnez $2, $BB3_14
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.10: # %pre.PHI.2
; MIPS32-NEXT: j $BB3_12
; MIPS32-NEXT: # %bb.13: # %pre.PHI.2
; MIPS32-NEXT: j $BB3_15
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_11: # %b.PHI.2.0
; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0
; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: j $BB3_13
; MIPS32-NEXT: j $BB3_16
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_12: # %b.PHI.2.1
; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1
; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload
; MIPS32-NEXT: ldc1 $f0, 0($1)
; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill
; MIPS32-NEXT: $BB3_13: # %b.PHI.2
; MIPS32-NEXT: $BB3_16: # %b.PHI.2
; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload
; MIPS32-NEXT: andi $2, $1, 1
@ -664,16 +709,19 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a,
; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill
; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill
; MIPS32-NEXT: bnez $2, $BB3_15
; MIPS32-NEXT: bnez $2, $BB3_19
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end
; MIPS32-NEXT: # %bb.17: # %b.PHI.2
; MIPS32-NEXT: j $BB3_18
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end
; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload
; MIPS32-NEXT: sdc1 $f0, 0($1)
; MIPS32-NEXT: addiu $sp, $sp, 88
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB3_15: # %b.PHI.3
; MIPS32-NEXT: $BB3_19: # %b.PHI.3
; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload
; MIPS32-NEXT: ldc1 $f2, 24($sp) # 8-byte Folded Reload
; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload