diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 0120adcbb3d..434d1c7d286 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1319,172 +1319,232 @@ HoistTerminator: return true; } +// Return true if V0 and V1 are equivalent. This handles the obvious cases +// where V0 == V1 and V0 and V1 are both identical instructions, but also +// handles loads and stores with identical operands. +// +// Because determining if two memory instructions are equivalent +// depends on control flow, the \c At0 and \c At1 parameters specify a +// location for the query. This function is essentially answering the +// query "If V0 were moved to At0, and V1 were moved to At1, are V0 and V1 +// equivalent?". In practice this means checking that moving V0 to At0 +// doesn't cross any other memory instructions. +static bool areValuesTriviallySame(Value *V0, BasicBlock::const_iterator At0, + Value *V1, BasicBlock::const_iterator At1) { + if (V0 == V1) + return true; + + // Also check for instructions that are identical but not pointer-identical. + // This can include load instructions that haven't been CSE'd. + if (!isa(V0) || !isa(V1)) + return false; + const auto *I0 = cast(V0); + const auto *I1 = cast(V1); + if (!I0->isIdenticalToWhenDefined(I1)) + return false; + + if (!I0->mayReadOrWriteMemory()) + return true; + + // Instructions that may read or write memory have extra restrictions. We + // must ensure we don't treat %a and %b as equivalent in code such as: + // + // %a = load %x + // store %x, 1 + // if (%c) { + // %b = load %x + // %d = add %b, 1 + // } else { + // %d = add %a, 1 + // } + + // Be conservative. We don't want to search the entire CFG between def + // and use; if the def isn't in the same block as the use just bail. + if (I0->getParent() != At0->getParent() || + I1->getParent() != At1->getParent()) + return false; + + // Again, be super conservative. Ideally we'd be able to query AliasAnalysis + // but we currently don't have that available. + auto WritesMemory = [](const Instruction &I) { + return I.mayReadOrWriteMemory(); + }; + if (std::any_of(std::next(I0->getIterator()), At0, WritesMemory)) + return false; + if (std::any_of(std::next(I1->getIterator()), At1, WritesMemory)) + return false; + return true; +} + +// Is it legal to replace the operand \c OpIdx of \c GEP with a PHI node? +static bool canReplaceGEPOperandWithPHI(const Instruction *GEP, + unsigned OpIdx) { + if (OpIdx == 0) + return true; + gep_type_iterator It = std::next(gep_type_begin(GEP), OpIdx - 1); + return !It->isStructTy(); +} + +// All blocks in Blocks unconditionally jump to a common successor. Analyze +// the last non-terminator instruction in each block and return true if it would +// be possible to sink them into their successor, creating one common +// instruction instead. Set NumPHIsRequired to the number of PHI nodes that +// would need to be created during sinking. +static bool canSinkLastInstruction(ArrayRef Blocks, + unsigned &NumPHIsRequired) { + SmallVector Insts; + for (auto *BB : Blocks) { + if (BB->getTerminator() == &BB->front()) + // Block was empty. + return false; + Insts.push_back(BB->getTerminator()->getPrevNode()); + } + + // Prune out obviously bad instructions to move. Any non-store instruction + // must have exactly one use, and we check later that use is by a single, + // common PHI instruction in the successor. + for (auto *I : Insts) { + // These instructions may change or break semantics if moved. + if (isa(I) || I->isEHPad() || isa(I) || + I->getType()->isTokenTy()) + return false; + // Apart from loads and stores, we won't move anything that could + // change memory or have sideeffects. + if (!isa(I) && !isa(I) && + (I->mayHaveSideEffects() || I->mayHaveSideEffects())) + return false; + // Everything must have only one use too, apart from stores which + // have no uses. + if (!isa(I) && !I->hasOneUse()) + return false; + } + + const Instruction *I0 = Insts.front(); + for (auto *I : Insts) + if (!I->isSameOperationAs(I0)) + return false; + + // If this isn't a store, check the only user is a single PHI. + if (!isa(I0)) { + auto *PNUse = dyn_cast(*I0->user_begin()); + if (!PNUse || + !all_of(Insts, [&PNUse](const Instruction *I) { + return *I->user_begin() == PNUse; + })) + return false; + } + + NumPHIsRequired = 0; + for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { + if (I0->getOperand(OI)->getType()->isTokenTy()) + // Don't touch any operand of token type. + return false; + auto SameAsI0 = [&I0, OI](const Instruction *I) { + return areValuesTriviallySame(I->getOperand(OI), I->getIterator(), + I0->getOperand(OI), I0->getIterator()); + }; + if (!all_of(Insts, SameAsI0)) { + if (isa(I0) && !canReplaceGEPOperandWithPHI(I0, OI)) + // We can't create a PHI from this GEP. + return false; + if (isa(I0) && OI == 2) + // We can't create a PHI for a shufflevector mask. + return false; + ++NumPHIsRequired; + } + } + return true; +} + +// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last +// instruction of every block in Blocks to their common successor, commoning +// into one instruction. +static void sinkLastInstruction(ArrayRef Blocks) { + unsigned Dummy; + (void)Dummy; + assert(canSinkLastInstruction(Blocks, Dummy) && + "Must analyze before transforming!"); + auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); + + // canSinkLastInstruction returning true guarantees that every block has at + // least one non-terminator instruction. + SmallVector Insts; + for (auto *BB : Blocks) + Insts.push_back(BB->getTerminator()->getPrevNode()); + + // We don't need to do any checking here; canSinkLastInstruction should have + // done it all for us. + Instruction *I0 = Insts.front(); + SmallVector NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + // This check is different to that in canSinkLastInstruction. There, we + // cared about the global view once simplifycfg (and instcombine) have + // completed - it takes into account PHIs that become trivially + // simplifiable. However here we need a more local view; if an operand + // differs we create a PHI and rely on instcombine to clean up the very + // small mess we may make. + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + if (!isa(I0)) { + // canSinkLastInstruction checked that all instructions were used by + // one and only one PHI node. Find that now, RAUW it to our common + // instruction and nuke it. + assert(I0->hasOneUse()); + auto *PN = cast(*I0->user_begin()); + PN->replaceAllUsesWith(I0); + PN->eraseFromParent(); + } + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); +} + /// Given an unconditional branch that goes to BBEnd, /// check whether BBEnd has only two predecessors and the other predecessor /// ends with an unconditional branch. If it is true, sink any common code /// in the two predecessors to BBEnd. static bool SinkThenElseCodeToEnd(BranchInst *BI1) { assert(BI1->isUnconditional()); - BasicBlock *BB1 = BI1->getParent(); BasicBlock *BBEnd = BI1->getSuccessor(0); - // Check that BBEnd has two predecessors and the other predecessor ends with - // an unconditional branch. - pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd); - BasicBlock *Pred0 = *PI++; - if (PI == PE) // Only one predecessor. + SmallVector Blocks; + for (auto *BB : predecessors(BBEnd)) + Blocks.push_back(BB); + if (Blocks.size() != 2 || + !all_of(Blocks, [](const BasicBlock *BB) { + auto *BI = dyn_cast(BB->getTerminator()); + return BI && BI->isUnconditional(); + })) return false; - BasicBlock *Pred1 = *PI++; - if (PI != PE) // More than two predecessors. - return false; - BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0; - BranchInst *BI2 = dyn_cast(BB2->getTerminator()); - if (!BI2 || !BI2->isUnconditional()) - return false; - - // Gather the PHI nodes in BBEnd. - SmallDenseMap, PHINode *> JointValueMap; - Instruction *FirstNonPhiInBBEnd = nullptr; - for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) { - if (PHINode *PN = dyn_cast(I)) { - Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BB2V = PN->getIncomingValueForBlock(BB2); - JointValueMap[std::make_pair(BB1V, BB2V)] = PN; - } else { - FirstNonPhiInBBEnd = &*I; - break; - } - } - if (!FirstNonPhiInBBEnd) - return false; - - // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. We scan backward for obviously identical - // instructions in an identical order. - BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), - RE1 = BB1->getInstList().rend(), - RI2 = BB2->getInstList().rbegin(), - RE2 = BB2->getInstList().rend(); - // Skip debug info. - while (RI1 != RE1 && isa(&*RI1)) - ++RI1; - if (RI1 == RE1) - return false; - while (RI2 != RE2 && isa(&*RI2)) - ++RI2; - if (RI2 == RE2) - return false; - // Skip the unconditional branches. - ++RI1; - ++RI2; bool Changed = false; - while (RI1 != RE1 && RI2 != RE2) { - // Skip debug info. - while (RI1 != RE1 && isa(&*RI1)) - ++RI1; - if (RI1 == RE1) - return Changed; - while (RI2 != RE2 && isa(&*RI2)) - ++RI2; - if (RI2 == RE2) - return Changed; - - Instruction *I1 = &*RI1, *I2 = &*RI2; - auto InstPair = std::make_pair(I1, I2); - // I1 and I2 should have a single use in the same PHI node, and they - // perform the same operation. - // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I1) || isa(I2) || isa(I1) || - isa(I2) || I1->isEHPad() || I2->isEHPad() || - isa(I1) || isa(I2) || - I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || - I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || - !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair)) - return Changed; - - // Check whether we should swap the operands of ICmpInst. - // TODO: Add support of communativity. - ICmpInst *ICmp1 = dyn_cast(I1), *ICmp2 = dyn_cast(I2); - bool SwapOpnds = false; - if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) && - ICmp1->getOperand(1) != ICmp2->getOperand(1) && - (ICmp1->getOperand(0) == ICmp2->getOperand(1) || - ICmp1->getOperand(1) == ICmp2->getOperand(0))) { - ICmp2->swapOperands(); - SwapOpnds = true; - } - if (!I1->isSameOperationAs(I2)) { - if (SwapOpnds) - ICmp2->swapOperands(); - return Changed; - } - - // The operands should be either the same or they need to be generated - // with a PHI node after sinking. We only handle the case where there is - // a single pair of different operands. - Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr; - unsigned Op1Idx = ~0U; - for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { - if (I1->getOperand(I) == I2->getOperand(I)) - continue; - // Early exit if we have more-than one pair of different operands or if - // we need a PHI node to replace a constant. - if (Op1Idx != ~0U || isa(I1->getOperand(I)) || - isa(I2->getOperand(I))) { - // If we can't sink the instructions, undo the swapping. - if (SwapOpnds) - ICmp2->swapOperands(); - return Changed; - } - DifferentOp1 = I1->getOperand(I); - Op1Idx = I; - DifferentOp2 = I2->getOperand(I); - } - - DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n"); - DEBUG(dbgs() << " " << *I2 << "\n"); - - // We insert the pair of different operands to JointValueMap and - // remove (I1, I2) from JointValueMap. - if (Op1Idx != ~0U) { - auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)]; - if (!NewPN) { - NewPN = - PHINode::Create(DifferentOp1->getType(), 2, - DifferentOp1->getName() + ".sink", &BBEnd->front()); - NewPN->addIncoming(DifferentOp1, BB1); - NewPN->addIncoming(DifferentOp2, BB2); - DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); - } - // I1 should use NewPN instead of DifferentOp1. - I1->setOperand(Op1Idx, NewPN); - } - PHINode *OldPN = JointValueMap[InstPair]; - JointValueMap.erase(InstPair); - - // We need to update RE1 and RE2 if we are going to sink the first - // instruction in the basic block down. - bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front()); - // Sink the instruction. - BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(), - BB1->getInstList(), I1); - if (!OldPN->use_empty()) - OldPN->replaceAllUsesWith(I1); - OldPN->eraseFromParent(); - - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->intersectOptionalDataWith(I2); - // TODO: Use combineMetadata here to preserve what metadata we can - // (analogous to the hoisting case above). - I2->eraseFromParent(); - - if (UpdateRE1) - RE1 = BB1->getInstList().rend(); - if (UpdateRE2) - RE2 = BB2->getInstList().rend(); - FirstNonPhiInBBEnd = &*I1; + unsigned NumPHIsToInsert; + while (canSinkLastInstruction(Blocks, NumPHIsToInsert) && NumPHIsToInsert <= 1) { + sinkLastInstruction(Blocks); NumSinkCommons++; Changed = true; } diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll index 79e8e68e2f5..78d3ebf371a 100644 --- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -106,7 +106,7 @@ if.then: if.else: store i32 3, i32* %p, align 4 - %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 2 + %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 3 store i32 5, i32* %incdec.ptr1, align 4 store i32 6, i32* %incdec.ptr5, align 4 br label %if.end diff --git a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll index a992ce3bf85..e22614388b8 100644 --- a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll +++ b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll @@ -9,8 +9,9 @@ ; return -1; ; } +; CHECK: mvnlt ; CHECK: .loc 1 6 7 -; CHECK: mvn +; CHECK: strlt target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7--linux-gnueabihf" diff --git a/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll b/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll index cfbe219c4c2..7144da2d4bc 100644 --- a/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll +++ b/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll @@ -29,7 +29,8 @@ if.else: ; preds = %entry %4 = load double, double* %a, align 8 %mul1 = fmul fast double %1, %4 %sub1 = fsub fast double %mul1, %0 - store double %sub1, double* %y, align 8 + %gep1 = getelementptr double, double* %y, i32 1 + store double %sub1, double* %gep1, align 8 br label %if.end if.end: ; preds = %if.else, %if.then diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll index cdb6ed29d85..b1b5ff7ddf7 100644 --- a/test/Transforms/SimplifyCFG/sink-common-code.ll +++ b/test/Transforms/SimplifyCFG/sink-common-code.ll @@ -81,3 +81,204 @@ if.end: ; CHECK: call ; CHECK: add ; CHECK-NOT: br + +define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test4 +; CHECK: select +; CHECK: store +; CHECK-NOT: store + +define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test5 +; CHECK: store volatile +; CHECK: store + +define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test6 +; CHECK: select +; CHECK: store volatile +; CHECK-NOT: store + +define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test7 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile +; CHECK-NOT: load +; CHECK-NOT: store + +; %z and %w are in different blocks. We shouldn't sink the add because +; there may be intervening memory instructions. +define i32 @test8(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + %z = load volatile i32, i32* %y + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test8 +; CHECK: add +; CHECK: add + +; The extra store in %if.then means %z and %w are not equivalent. +define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + store i32 7, i32* %p + %z = load volatile i32, i32* %y + store i32 6, i32* %p + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test9 +; CHECK: add +; CHECK: add + +%struct.anon = type { i32, i32 } + +; The GEP indexes a struct type so cannot have a variable last index. +define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 5 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + store volatile i32 %x, i32* %gepa + br label %if.end + +if.else: + %dummy1 = add i32 %x, 6 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + store volatile i32 %x, i32* %gepb + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test10 +; CHECK: getelementptr +; CHECK: getelementptr +; CHECK: phi +; CHECK: store volatile + +; The shufflevector's mask operand cannot be merged in a PHI. +define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> + br label %if.end + +if.end: + %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test11 +; CHECK: shufflevector +; CHECK: shufflevector