1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

Revert "[HardwareLoops] Change order of SCEV expression construction for InitLoopCount."

This causes https://bugs.llvm.org/show_bug.cgi?id=51714 and
is not a right patch according to comments in D91724

This reverts commit 42eaf4fe0adef3344adfd9fbccd49f325cb549ef.

(cherry picked from commit 34badc409cc452575c538c4b6449546adc38f121)
This commit is contained in:
Chen Zheng 2021-09-03 02:53:31 +00:00 committed by Tom Stellard
parent d21237cb11
commit 921995afd5
5 changed files with 37 additions and 28 deletions

View File

@ -97,7 +97,7 @@ struct HardwareLoopInfo {
Loop *L = nullptr; Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr; BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr; BranchInst *ExitBranch = nullptr;
const SCEV *TripCount = nullptr; const SCEV *ExitCount = nullptr;
IntegerType *CountType = nullptr; IntegerType *CountType = nullptr;
Value *LoopDecrement = nullptr; // Decrement the loop counter by this Value *LoopDecrement = nullptr; // Decrement the loop counter by this
// value in every iteration. // value in every iteration.

View File

@ -167,11 +167,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// Note that this block may not be the loop latch block, even if the loop // Note that this block may not be the loop latch block, even if the loop
// has a latch block. // has a latch block.
ExitBlock = BB; ExitBlock = BB;
TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType())); ExitCount = EC;
if (!EC->getType()->isPointerTy() && EC->getType() != CountType)
TripCount = SE.getZeroExtendExpr(TripCount, CountType);
break; break;
} }

View File

@ -187,7 +187,7 @@ namespace {
const DataLayout &DL, const DataLayout &DL,
OptimizationRemarkEmitter *ORE) : OptimizationRemarkEmitter *ORE) :
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
TripCount(Info.TripCount), ExitCount(Info.ExitCount),
CountType(Info.CountType), CountType(Info.CountType),
ExitBranch(Info.ExitBranch), ExitBranch(Info.ExitBranch),
LoopDecrement(Info.LoopDecrement), LoopDecrement(Info.LoopDecrement),
@ -202,7 +202,7 @@ namespace {
OptimizationRemarkEmitter *ORE = nullptr; OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr; Loop *L = nullptr;
Module *M = nullptr; Module *M = nullptr;
const SCEV *TripCount = nullptr; const SCEV *ExitCount = nullptr;
Type *CountType = nullptr; Type *CountType = nullptr;
BranchInst *ExitBranch = nullptr; BranchInst *ExitBranch = nullptr;
Value *LoopDecrement = nullptr; Value *LoopDecrement = nullptr;
@ -296,7 +296,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
} }
assert( assert(
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) && (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
"Hardware Loop must have set exit info."); "Hardware Loop must have set exit info.");
BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Preheader = L->getLoopPreheader();
@ -381,13 +381,18 @@ Value *HardwareLoop::InitLoopCount() {
// loop counter and tests that is not zero? // loop counter and tests that is not zero?
SCEVExpander SCEVE(SE, DL, "loopcnt"); SCEVExpander SCEVE(SE, DL, "loopcnt");
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
// If we're trying to use the 'test and set' form of the intrinsic, we need // If we're trying to use the 'test and set' form of the intrinsic, we need
// to replace a conditional branch that is controlling entry to the loop. It // to replace a conditional branch that is controlling entry to the loop. It
// is likely (guaranteed?) that the preheader has an unconditional branch to // is likely (guaranteed?) that the preheader has an unconditional branch to
// the loop header, so also check if it has a single predecessor. // the loop header, so also check if it has a single predecessor.
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount, if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
SE.getZero(TripCount->getType()))) { SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
UseLoopGuard |= ForceGuardLoopEntry; UseLoopGuard |= ForceGuardLoopEntry;
} else } else
@ -399,19 +404,19 @@ Value *HardwareLoop::InitLoopCount() {
BasicBlock *Predecessor = BB->getSinglePredecessor(); BasicBlock *Predecessor = BB->getSinglePredecessor();
// If it's not safe to create a while loop then don't force it and create a // If it's not safe to create a while loop then don't force it and create a
// do-while loop instead // do-while loop instead
if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE)) if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
UseLoopGuard = false; UseLoopGuard = false;
else else
BB = Predecessor; BB = Predecessor;
} }
if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) { if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
<< "\n"); << *ExitCount << "\n");
return nullptr; return nullptr;
} }
Value *Count = SCEVE.expandCodeFor(TripCount, CountType, Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
BB->getTerminator()); BB->getTerminator());
// FIXME: We've expanded Count where we hope to insert the counter setting // FIXME: We've expanded Count where we hope to insert the counter setting

View File

@ -22,12 +22,14 @@ define i64 @foo(i8* %p, i32 signext %n, i32 signext %count) {
; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: cmpwi r4, 1
; CHECK-NEXT: blt cr0, .LBB0_4 ; CHECK-NEXT: blt cr0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: clrldi r4, r4, 32 ; CHECK-NEXT: addi r4, r4, -1
; CHECK-NEXT: extsw r5, r5 ; CHECK-NEXT: extsw r5, r5
; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: li r6, 0
; CHECK-NEXT: li r7, 5 ; CHECK-NEXT: li r7, 5
; CHECK-NEXT: mtctr r4
; CHECK-NEXT: li r8, 9 ; CHECK-NEXT: li r8, 9
; CHECK-NEXT: clrldi r4, r4, 32
; CHECK-NEXT: addi r4, r4, 1
; CHECK-NEXT: mtctr r4
; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: li r4, 0
; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # %for.body ; CHECK-NEXT: .LBB0_2: # %for.body
@ -93,21 +95,23 @@ define zeroext i8 @foo1(i8* %p, i32 signext %n, i32 signext %count) {
; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: cmpwi r4, 1
; CHECK-NEXT: blt cr0, .LBB1_4 ; CHECK-NEXT: blt cr0, .LBB1_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: addi r6, r3, 1000 ; CHECK-NEXT: addi r4, r4, -1
; CHECK-NEXT: clrldi r3, r4, 32 ; CHECK-NEXT: addi r3, r3, 1000
; CHECK-NEXT: extsw r5, r5 ; CHECK-NEXT: extsw r5, r5
; CHECK-NEXT: li r6, 0
; CHECK-NEXT: clrldi r4, r4, 32
; CHECK-NEXT: addi r4, r4, 1
; CHECK-NEXT: mtctr r4
; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: li r4, 0
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_2: # %for.body ; CHECK-NEXT: .LBB1_2: # %for.body
; CHECK-NEXT: # ; CHECK-NEXT: #
; CHECK-NEXT: lbzx r7, r6, r4 ; CHECK-NEXT: lbzx r7, r3, r6
; CHECK-NEXT: add r4, r4, r5 ; CHECK-NEXT: add r6, r6, r5
; CHECK-NEXT: add r3, r7, r3 ; CHECK-NEXT: add r4, r7, r4
; CHECK-NEXT: bdnz .LBB1_2 ; CHECK-NEXT: bdnz .LBB1_2
; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: clrldi r3, r4, 56
; CHECK-NEXT: blr ; CHECK-NEXT: blr
; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: li r3, 0

View File

@ -263,7 +263,9 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun
; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: cmpwi r4, 1
; CHECK-NEXT: blt cr0, .LBB3_5 ; CHECK-NEXT: blt cr0, .LBB3_5
; CHECK-NEXT: .LBB3_3: # %for.body.preheader ; CHECK-NEXT: .LBB3_3: # %for.body.preheader
; CHECK-NEXT: clrldi r3, r4, 32 ; CHECK-NEXT: addi r3, r4, -1
; CHECK-NEXT: clrldi r3, r3, 32
; CHECK-NEXT: addi r3, r3, 1
; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: mtctr r3
; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB3_4: # %for.body ; CHECK-NEXT: .LBB3_4: # %for.body
@ -293,7 +295,9 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun
; CHECK-BE-NEXT: cmpwi r4, 1 ; CHECK-BE-NEXT: cmpwi r4, 1
; CHECK-BE-NEXT: blt cr0, .LBB3_5 ; CHECK-BE-NEXT: blt cr0, .LBB3_5
; CHECK-BE-NEXT: .LBB3_3: # %for.body.preheader ; CHECK-BE-NEXT: .LBB3_3: # %for.body.preheader
; CHECK-BE-NEXT: clrldi r3, r4, 32 ; CHECK-BE-NEXT: addi r3, r4, -1
; CHECK-BE-NEXT: clrldi r3, r3, 32
; CHECK-BE-NEXT: addi r3, r3, 1
; CHECK-BE-NEXT: mtctr r3 ; CHECK-BE-NEXT: mtctr r3
; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .p2align 4
; CHECK-BE-NEXT: .LBB3_4: # %for.body ; CHECK-BE-NEXT: .LBB3_4: # %for.body