diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 628058142e4..5ab58ca0646 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -97,7 +97,7 @@ struct HardwareLoopInfo { Loop *L = nullptr; BasicBlock *ExitBlock = nullptr; BranchInst *ExitBranch = nullptr; - const SCEV *TripCount = nullptr; + const SCEV *ExitCount = nullptr; IntegerType *CountType = nullptr; Value *LoopDecrement = nullptr; // Decrement the loop counter by this // value in every iteration. diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 304d24fe8e4..9053acce60c 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -167,11 +167,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // Note that this block may not be the loop latch block, even if the loop // has a latch block. ExitBlock = BB; - TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType())); - - if (!EC->getType()->isPointerTy() && EC->getType() != CountType) - TripCount = SE.getZeroExtendExpr(TripCount, CountType); - + ExitCount = EC; break; } diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp index 4316034371a..248ef6c2397 100644 --- a/lib/CodeGen/HardwareLoops.cpp +++ b/lib/CodeGen/HardwareLoops.cpp @@ -187,7 +187,7 @@ namespace { const DataLayout &DL, OptimizationRemarkEmitter *ORE) : SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), - TripCount(Info.TripCount), + ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), LoopDecrement(Info.LoopDecrement), @@ -202,7 +202,7 @@ namespace { OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; - const SCEV *TripCount = nullptr; + const SCEV *ExitCount = nullptr; Type *CountType = nullptr; BranchInst *ExitBranch = nullptr; Value *LoopDecrement = nullptr; @@ -296,7 +296,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { } assert( - (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) && + (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && "Hardware Loop must have set exit info."); BasicBlock *Preheader = L->getLoopPreheader(); @@ -381,13 +381,18 @@ Value *HardwareLoop::InitLoopCount() { // loop counter and tests that is not zero? SCEVExpander SCEVE(SE, DL, "loopcnt"); + if (!ExitCount->getType()->isPointerTy() && + ExitCount->getType() != CountType) + ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); + + ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); // If we're trying to use the 'test and set' form of the intrinsic, we need // to replace a conditional branch that is controlling entry to the loop. It // is likely (guaranteed?) that the preheader has an unconditional branch to // the loop header, so also check if it has a single predecessor. - if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount, - SE.getZero(TripCount->getType()))) { + if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, + SE.getZero(ExitCount->getType()))) { LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); UseLoopGuard |= ForceGuardLoopEntry; } else @@ -399,19 +404,19 @@ Value *HardwareLoop::InitLoopCount() { BasicBlock *Predecessor = BB->getSinglePredecessor(); // If it's not safe to create a while loop then don't force it and create a // do-while loop instead - if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE)) + if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE)) UseLoopGuard = false; else BB = Predecessor; } - if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) { - LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount - << "\n"); + if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { + LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " + << *ExitCount << "\n"); return nullptr; } - Value *Count = SCEVE.expandCodeFor(TripCount, CountType, + Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, BB->getTerminator()); // FIXME: We've expanded Count where we hope to insert the counter setting diff --git a/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll index 8f6717420eb..be15f456408 100644 --- a/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll +++ b/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll @@ -22,12 +22,14 @@ define i64 @foo(i8* %p, i32 signext %n, i32 signext %count) { ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB0_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r4, r4, -1 ; CHECK-NEXT: extsw r5, r5 ; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: li r7, 5 -; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r8, 9 +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r4, r4, 1 +; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_2: # %for.body @@ -93,21 +95,23 @@ define zeroext i8 @foo1(i8* %p, i32 signext %n, i32 signext %count) { ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB1_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: addi r6, r3, 1000 -; CHECK-NEXT: clrldi r3, r4, 32 +; CHECK-NEXT: addi r4, r4, -1 +; CHECK-NEXT: addi r3, r3, 1000 ; CHECK-NEXT: extsw r5, r5 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r4, r4, 1 +; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: mtctr r3 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lbzx r7, r6, r4 -; CHECK-NEXT: add r4, r4, r5 -; CHECK-NEXT: add r3, r7, r3 +; CHECK-NEXT: lbzx r7, r3, r6 +; CHECK-NEXT: add r6, r6, r5 +; CHECK-NEXT: add r4, r7, r4 ; CHECK-NEXT: bdnz .LBB1_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup -; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: clrldi r3, r4, 56 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: li r3, 0 diff --git a/test/CodeGen/PowerPC/mma-phi-accs.ll b/test/CodeGen/PowerPC/mma-phi-accs.ll index f5b05f40e6b..26cf24382dc 100644 --- a/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -263,7 +263,9 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB3_5 ; CHECK-NEXT: .LBB3_3: # %for.body.preheader -; CHECK-NEXT: clrldi r3, r4, 32 +; CHECK-NEXT: addi r3, r4, -1 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: addi r3, r3, 1 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB3_4: # %for.body @@ -293,7 +295,9 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun ; CHECK-BE-NEXT: cmpwi r4, 1 ; CHECK-BE-NEXT: blt cr0, .LBB3_5 ; CHECK-BE-NEXT: .LBB3_3: # %for.body.preheader -; CHECK-BE-NEXT: clrldi r3, r4, 32 +; CHECK-BE-NEXT: addi r3, r4, -1 +; CHECK-BE-NEXT: clrldi r3, r3, 32 +; CHECK-BE-NEXT: addi r3, r3, 1 ; CHECK-BE-NEXT: mtctr r3 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB3_4: # %for.body