From 47533e9dd74bc59a47064c73f163ffc2f78de0e4 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sat, 11 Apr 2015 00:33:08 +0000 Subject: [PATCH] [PowerPC] Fix PPCLoopPreIncPrep for depth > 1 loops This pass had the same problem as the data-prefetching pass: it was only checking for depth == 1 loops in practice. Fix that, add some debugging statements, and make sure that, when we grab an AddRec, it is for the loop we expect. llvm-svn: 234670 --- lib/Target/PowerPC/PPCLoopPreIncPrep.cpp | 37 ++++++++++++----- test/CodeGen/PowerPC/pip-inner.ll | 52 ++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/PowerPC/pip-inner.ll diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 092a4efacb1..41cc15ab419 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "ppc-loop-preinc-prep" #include "PPC.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -143,8 +144,10 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { + if (LI->empty()) + return MadeChange; + + for (auto I = df_begin(*LI->begin()), E = df_end(*LI->begin()); I != E; ++I) { Loop *L = *I; MadeChange |= runOnLoop(L); } @@ -159,16 +162,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!L->empty()) return MadeChange; + DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); + BasicBlock *Header = L->getHeader(); const PPCSubtarget *ST = TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - unsigned HeaderLoopPredCount = 0; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - ++HeaderLoopPredCount; - } + unsigned HeaderLoopPredCount = + std::distance(pred_begin(Header), pred_end(Header)); // Collect buckets of comparable addresses used by loads and stores. typedef std::multimap Bucket; @@ -205,9 +207,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (L->isLoopInvariant(PtrValue)) continue; - const SCEV *LSCEV = SE->getSCEV(PtrValue); - if (!isa(LSCEV)) + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + if (const SCEVAddRecExpr *LARSCEV = dyn_cast(LSCEV)) { + if (LARSCEV->getLoop() != L) + continue; + } else { continue; + } bool FoundBucket = false; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) @@ -236,11 +242,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // returns a value (which might contribute to determining the loop's // iteration space), insert a new preheader for the loop. if (!LoopPredecessor || - !LoopPredecessor->getTerminator()->getType()->isVoidTy()) + !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { LoopPredecessor = InsertPreheaderForLoop(L, this); + if (LoopPredecessor) + MadeChange = true; + } if (!LoopPredecessor) return MadeChange; + DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); + SmallSet BBChanged; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { // The base address of each bucket is transformed into a phi and the others @@ -251,6 +262,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!BasePtrSCEV->isAffine()) continue; + DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + assert(BasePtrSCEV->getLoop() == L && + "AddRec for the wrong loop?"); + Instruction *MemI = Buckets[i].begin()->second; Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); @@ -271,6 +286,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!isSafeToExpand(BasePtrStartSCEV, *SE)) continue; + DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, MemI->hasName() ? MemI->getName() + ".phi" : "", Header->getFirstNonPHI()); diff --git a/test/CodeGen/PowerPC/pip-inner.ll b/test/CodeGen/PowerPC/pip-inner.ll new file mode 100644 index 00000000000..930f0d37147 --- /dev/null +++ b/test/CodeGen/PowerPC/pip-inner.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @foo(double* %x, double* nocapture readonly %y) #0 { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.end, %entry + %i.015 = phi i32 [ 0, %entry ], [ %inc7, %for.end ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds double, double* %x, i64 %indvars.iv + store double %add, double* %arrayidx5, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16000 + br i1 %exitcond, label %for.end, label %for.body3 + +for.end: ; preds = %for.body3 + tail call void @bar(double* %x) #2 + %inc7 = add nuw nsw i32 %i.015, 1 + %exitcond16 = icmp eq i32 %inc7, 1000 + br i1 %exitcond16, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.end + ret void + +; CHECK-LABEL: @foo + +; CHECK: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}}) +; CHECK: fadd [[REG2:[0-9]+]], [[REG1]], {{[0-9]+}} +; CHECK: stfdu [[REG2]], 8({{[0-9]+}}) +; CHECK: bdnz + +; CHECK: bl bar +; CHECK-NEXT: nop + +; CHECK: blr +} + +declare void @bar(double*) #1 + +attributes #0 = { nounwind "target-cpu"="a2" } +attributes #1 = { "target-cpu"="a2" } +attributes #2 = { nounwind } +