From 88bae728147442244a4010ce6b3c5e3a607500d9 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 26 May 2021 08:40:01 -0700 Subject: [PATCH] [unroll] Use value domain for symbolic execution based cost model The current full unroll cost model does a symbolic evaluation of the loop up to a fixed limit. That symbolic evaluation currently simplifies to constants, but we can generalize to arbitrary Values using the InstructionSimplify infrastructure at very low cost. By itself, this enables some simplifications, but it's mainly useful when combined with the branch simplification over in D102928. Differential Revision: https://reviews.llvm.org/D102934 --- include/llvm/Analysis/LoopUnrollAnalyzer.h | 7 +- lib/Analysis/LoopUnrollAnalyzer.cpp | 45 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 24 +- .../unroll-cost-symbolic-execute.ll | 430 ++++++++++++++++-- unittests/Analysis/UnrollAnalyzerTest.cpp | 4 +- 5 files changed, 423 insertions(+), 87 deletions(-) diff --git a/include/llvm/Analysis/LoopUnrollAnalyzer.h b/include/llvm/Analysis/LoopUnrollAnalyzer.h index 9226dd45580..7cf8a081f9a 100644 --- a/include/llvm/Analysis/LoopUnrollAnalyzer.h +++ b/include/llvm/Analysis/LoopUnrollAnalyzer.h @@ -46,7 +46,7 @@ class UnrolledInstAnalyzer : private InstVisitor { public: UnrolledInstAnalyzer(unsigned Iteration, - DenseMap &SimplifiedValues, + DenseMap &SimplifiedValues, ScalarEvolution &SE, const Loop *L) : SimplifiedValues(SimplifiedValues), SE(SE), L(L) { IterationNumber = SE.getConstant(APInt(64, Iteration)); @@ -68,15 +68,12 @@ private: /// iteration. const SCEV *IterationNumber; - /// A Value->Constant map for keeping values that we managed to - /// constant-fold on the given iteration. - /// /// While we walk the loop instructions, we build up and maintain a mapping /// of simplified values specific to this iteration. The idea is to propagate /// any special information we have about loads that can be replaced with /// constants after complete unrolling, and account for likely simplifications /// post-unrolling. - DenseMap &SimplifiedValues; + DenseMap &SimplifiedValues; ScalarEvolution &SE; const Loop *L; diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp index 68d769664c6..15095d67d38 100644 --- a/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -74,10 +74,10 @@ bool UnrolledInstAnalyzer::simplifyInstWithSCEV(Instruction *I) { bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + if (Value *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + if (Value *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; Value *SimpleV = nullptr; @@ -88,11 +88,10 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) { else SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); - if (Constant *C = dyn_cast_or_null(SimpleV)) - SimplifiedValues[&I] = C; - - if (SimpleV) + if (SimpleV) { + SimplifiedValues[&I] = SimpleV; return true; + } return Base::visitBinaryOperator(I); } @@ -147,20 +146,17 @@ bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) { /// Try to simplify cast instruction. bool UnrolledInstAnalyzer::visitCastInst(CastInst &I) { - // Propagate constants through casts. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); + Value *Op = I.getOperand(0); + if (Value *Simplified = SimplifiedValues.lookup(Op)) + Op = Simplified; - // If we know a simplified value for this operand and cast is valid, save the - // result to SimplifiedValues. // The cast can be invalid, because SimplifiedValues contains results of SCEV // analysis, which operates on integers (and, e.g., might convert i8* null to // i32 0). - if (COp && CastInst::castIsValid(I.getOpcode(), COp, I.getType())) { - if (Constant *C = - ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { - SimplifiedValues[&I] = C; + if (CastInst::castIsValid(I.getOpcode(), Op, I.getType())) { + const DataLayout &DL = I.getModule()->getDataLayout(); + if (Value *V = SimplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) { + SimplifiedValues[&I] = V; return true; } } @@ -174,10 +170,10 @@ bool UnrolledInstAnalyzer::visitCmpInst(CmpInst &I) { // First try to handle simplified comparisons. if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + if (Value *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + if (Value *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; if (!isa(LHS) && !isa(RHS)) { @@ -195,15 +191,10 @@ bool UnrolledInstAnalyzer::visitCmpInst(CmpInst &I) { } } - if (Constant *CLHS = dyn_cast(LHS)) { - if (Constant *CRHS = dyn_cast(RHS)) { - if (CLHS->getType() == CRHS->getType()) { - if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - return true; - } - } - } + const DataLayout &DL = I.getModule()->getDataLayout(); + if (Value *V = SimplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) { + SimplifiedValues[&I] = V; + return true; } return Base::visitCmpInst(I); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index c9d27de1231..71a2b89decf 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -356,8 +356,8 @@ static Optional analyzeLoopUnrollCost( SmallSetVector BBWorklist; SmallSetVector, 4> ExitWorklist; - DenseMap SimplifiedValues; - SmallVector, 4> SimplifiedInputValues; + DenseMap SimplifiedValues; + SmallVector, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. @@ -498,11 +498,9 @@ static Optional analyzeLoopUnrollCost( Value *V = PHI->getIncomingValueForBlock( Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); - Constant *C = dyn_cast(V); - if (Iteration != 0 && !C) - C = SimplifiedValues.lookup(V); - if (C) - SimplifiedInputValues.push_back({PHI, C}); + if (Iteration != 0 && SimplifiedValues.count(V)) + V = SimplifiedValues.lookup(V); + SimplifiedInputValues.push_back({PHI, V}); } // Now clear and re-populate the map for the next iteration. @@ -571,13 +569,18 @@ static Optional analyzeLoopUnrollCost( Instruction *TI = BB->getTerminator(); + auto getSimplifiedConstant = [&](Value *V) -> Constant * { + if (SimplifiedValues.count(V)) + V = SimplifiedValues.lookup(V); + return dyn_cast(V); + }; + // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. BasicBlock *KnownSucc = nullptr; if (BranchInst *BI = dyn_cast(TI)) { if (BI->isConditional()) { - if (Constant *SimpleCond = - SimplifiedValues.lookup(BI->getCondition())) { + if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) { // Just take the first successor if condition is undef if (isa(SimpleCond)) KnownSucc = BI->getSuccessor(0); @@ -587,8 +590,7 @@ static Optional analyzeLoopUnrollCost( } } } else if (SwitchInst *SI = dyn_cast(TI)) { - if (Constant *SimpleCond = - SimplifiedValues.lookup(SI->getCondition())) { + if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) { // Just take the first successor if condition is undef if (isa(SimpleCond)) KnownSucc = SI->getSuccessor(0); diff --git a/test/Transforms/LoopUnroll/unroll-cost-symbolic-execute.ll b/test/Transforms/LoopUnroll/unroll-cost-symbolic-execute.ll index 865f0937bac..e4b6423ae64 100644 --- a/test/Transforms/LoopUnroll/unroll-cost-symbolic-execute.ll +++ b/test/Transforms/LoopUnroll/unroll-cost-symbolic-execute.ll @@ -12,37 +12,250 @@ define i32 @test_symbolic_simplify(i32 %limit) { ; CHECK-LABEL: @test_symbolic_simplify( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] -; CHECK: loop.peel.begin: -; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] -; CHECK: loop.peel: -; CHECK-NEXT: [[SUB_PEEL:%.*]] = sub i32 [[LIMIT:%.*]], 0 -; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i32 [[SUB_PEEL]], [[LIMIT]] -; CHECK-NEXT: [[ZEXT_PEEL:%.*]] = sext i1 [[CMP_PEEL]] to i32 -; CHECK-NEXT: store i32 [[ZEXT_PEEL]], i32* @G, align 4 -; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add i32 0, 1 -; CHECK-NEXT: [[LOOP_COND_PEEL:%.*]] = icmp ne i32 0, 80 -; CHECK-NEXT: br i1 [[LOOP_COND_PEEL]], label [[LOOP_PEEL_NEXT:%.*]], label [[DONE:%.*]] -; CHECK: loop.peel.next: -; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] -; CHECK: loop.peel.next1: -; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, [[LIMIT]] -; CHECK-NEXT: [[ZEXT:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: store i32 [[ZEXT]], i32* @G, align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 80 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: done.loopexit: -; CHECK-NEXT: [[ZEXT_LCSSA_PH:%.*]] = phi i32 [ [[ZEXT]], [[LOOP]] ] -; CHECK-NEXT: br label [[DONE]] -; CHECK: done: -; CHECK-NEXT: [[ZEXT_LCSSA:%.*]] = phi i32 [ [[ZEXT_PEEL]], [[LOOP_PEEL]] ], [ [[ZEXT_LCSSA_PH]], [[DONE_LOOPEXIT]] ] -; CHECK-NEXT: ret i32 [[ZEXT_LCSSA]] +; CHECK-NEXT: store i32 -1, i32* @G, align 4 +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i32 0, [[LIMIT:%.*]] +; CHECK-NEXT: [[ZEXT_1:%.*]] = sext i1 [[CMP_1]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_1]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_2:%.*]] = sext i1 [[CMP_2]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_2]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_3:%.*]] = sext i1 [[CMP_3]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_3]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_4:%.*]] = sext i1 [[CMP_4]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_4]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_5:%.*]] = sext i1 [[CMP_5]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_5]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_6:%.*]] = sext i1 [[CMP_6]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_6]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_7:%.*]] = sext i1 [[CMP_7]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_7]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_8:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_8:%.*]] = sext i1 [[CMP_8]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_8]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_9:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_9:%.*]] = sext i1 [[CMP_9]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_9]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_10:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_10:%.*]] = sext i1 [[CMP_10]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_10]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_11:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_11:%.*]] = sext i1 [[CMP_11]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_11]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_12:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_12:%.*]] = sext i1 [[CMP_12]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_12]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_13:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_13:%.*]] = sext i1 [[CMP_13]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_13]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_14:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_14:%.*]] = sext i1 [[CMP_14]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_14]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_15:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_15:%.*]] = sext i1 [[CMP_15]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_15]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_16:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_16:%.*]] = sext i1 [[CMP_16]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_16]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_17:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_17:%.*]] = sext i1 [[CMP_17]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_17]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_18:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_18:%.*]] = sext i1 [[CMP_18]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_18]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_19:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_19:%.*]] = sext i1 [[CMP_19]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_19]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_20:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_20:%.*]] = sext i1 [[CMP_20]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_20]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_21:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_21:%.*]] = sext i1 [[CMP_21]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_21]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_22:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_22:%.*]] = sext i1 [[CMP_22]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_22]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_23:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_23:%.*]] = sext i1 [[CMP_23]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_23]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_24:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_24:%.*]] = sext i1 [[CMP_24]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_24]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_25:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_25:%.*]] = sext i1 [[CMP_25]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_25]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_26:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_26:%.*]] = sext i1 [[CMP_26]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_26]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_27:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_27:%.*]] = sext i1 [[CMP_27]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_27]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_28:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_28:%.*]] = sext i1 [[CMP_28]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_28]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_29:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_29:%.*]] = sext i1 [[CMP_29]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_29]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_30:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_30:%.*]] = sext i1 [[CMP_30]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_30]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_31:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_31:%.*]] = sext i1 [[CMP_31]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_31]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_32:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_32:%.*]] = sext i1 [[CMP_32]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_32]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_33:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_33:%.*]] = sext i1 [[CMP_33]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_33]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_34:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_34:%.*]] = sext i1 [[CMP_34]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_34]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_35:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_35:%.*]] = sext i1 [[CMP_35]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_35]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_36:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_36:%.*]] = sext i1 [[CMP_36]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_36]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_37:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_37:%.*]] = sext i1 [[CMP_37]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_37]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_38:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_38:%.*]] = sext i1 [[CMP_38]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_38]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_39:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_39:%.*]] = sext i1 [[CMP_39]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_39]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_40:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_40:%.*]] = sext i1 [[CMP_40]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_40]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_41:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_41:%.*]] = sext i1 [[CMP_41]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_41]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_42:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_42:%.*]] = sext i1 [[CMP_42]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_42]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_43:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_43:%.*]] = sext i1 [[CMP_43]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_43]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_44:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_44:%.*]] = sext i1 [[CMP_44]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_44]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_45:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_45:%.*]] = sext i1 [[CMP_45]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_45]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_46:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_46:%.*]] = sext i1 [[CMP_46]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_46]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_47:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_47:%.*]] = sext i1 [[CMP_47]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_47]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_48:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_48:%.*]] = sext i1 [[CMP_48]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_48]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_49:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_49:%.*]] = sext i1 [[CMP_49]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_49]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_50:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_50:%.*]] = sext i1 [[CMP_50]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_50]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_51:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_51:%.*]] = sext i1 [[CMP_51]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_51]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_52:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_52:%.*]] = sext i1 [[CMP_52]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_52]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_53:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_53:%.*]] = sext i1 [[CMP_53]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_53]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_54:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_54:%.*]] = sext i1 [[CMP_54]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_54]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_55:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_55:%.*]] = sext i1 [[CMP_55]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_55]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_56:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_56:%.*]] = sext i1 [[CMP_56]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_56]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_57:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_57:%.*]] = sext i1 [[CMP_57]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_57]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_58:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_58:%.*]] = sext i1 [[CMP_58]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_58]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_59:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_59:%.*]] = sext i1 [[CMP_59]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_59]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_60:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_60:%.*]] = sext i1 [[CMP_60]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_60]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_61:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_61:%.*]] = sext i1 [[CMP_61]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_61]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_62:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_62:%.*]] = sext i1 [[CMP_62]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_62]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_63:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_63:%.*]] = sext i1 [[CMP_63]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_63]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_64:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_64:%.*]] = sext i1 [[CMP_64]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_64]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_65:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_65:%.*]] = sext i1 [[CMP_65]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_65]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_66:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_66:%.*]] = sext i1 [[CMP_66]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_66]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_67:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_67:%.*]] = sext i1 [[CMP_67]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_67]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_68:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_68:%.*]] = sext i1 [[CMP_68]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_68]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_69:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_69:%.*]] = sext i1 [[CMP_69]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_69]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_70:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_70:%.*]] = sext i1 [[CMP_70]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_70]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_71:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_71:%.*]] = sext i1 [[CMP_71]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_71]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_72:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_72:%.*]] = sext i1 [[CMP_72]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_72]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_73:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_73:%.*]] = sext i1 [[CMP_73]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_73]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_74:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_74:%.*]] = sext i1 [[CMP_74]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_74]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_75:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_75:%.*]] = sext i1 [[CMP_75]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_75]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_76:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_76:%.*]] = sext i1 [[CMP_76]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_76]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_77:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_77:%.*]] = sext i1 [[CMP_77]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_77]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_78:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_78:%.*]] = sext i1 [[CMP_78]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_78]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_79:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_79:%.*]] = sext i1 [[CMP_79]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_79]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_80:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_80:%.*]] = sext i1 [[CMP_80]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_80]], i32* @G, align 4 +; CHECK-NEXT: ret i32 [[ZEXT_80]] ; entry: br label %loop @@ -69,11 +282,7 @@ define i32 @test_symbolic_path(i32 %limit) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUM_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] -; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[LIMIT:%.*]], [[SUM]] -; CHECK-NEXT: [[IS_POSITIVE:%.*]] = icmp eq i32 [[SUB]], [[LIMIT]] -; CHECK-NEXT: br i1 [[IS_POSITIVE]], label [[BACKEDGE]], label [[IF_FALSE:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if.false: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: call void @foo() @@ -91,14 +300,151 @@ define i32 @test_symbolic_path(i32 %limit) { ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[HIDDEN_ZERO:%.*]] = sub i32 [[LIMIT]], [[SUB]] -; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[HIDDEN_ZERO]] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 8 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] -; CHECK: done: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT]], [[BACKEDGE]] ] -; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1:%.*]], label [[IF_FALSE_1:%.*]] +; CHECK: if.false.1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_1]] +; CHECK: backedge.1: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2:%.*]], label [[IF_FALSE_2:%.*]] +; CHECK: if.false.2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_2]] +; CHECK: backedge.2: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3:%.*]], label [[IF_FALSE_3:%.*]] +; CHECK: if.false.3: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_3]] +; CHECK: backedge.3: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_4:%.*]], label [[IF_FALSE_4:%.*]] +; CHECK: if.false.4: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_4]] +; CHECK: backedge.4: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_5:%.*]], label [[IF_FALSE_5:%.*]] +; CHECK: if.false.5: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_5]] +; CHECK: backedge.5: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_6:%.*]], label [[IF_FALSE_6:%.*]] +; CHECK: if.false.6: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_6]] +; CHECK: backedge.6: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_7:%.*]], label [[IF_FALSE_7:%.*]] +; CHECK: if.false.7: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_7]] +; CHECK: backedge.7: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_8:%.*]], label [[IF_FALSE_8:%.*]] +; CHECK: if.false.8: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_8]] +; CHECK: backedge.8: +; CHECK-NEXT: ret i32 0 ; entry: br label %loop diff --git a/unittests/Analysis/UnrollAnalyzerTest.cpp b/unittests/Analysis/UnrollAnalyzerTest.cpp index ac00b5d8a7b..2f9135d4fb2 100644 --- a/unittests/Analysis/UnrollAnalyzerTest.cpp +++ b/unittests/Analysis/UnrollAnalyzerTest.cpp @@ -19,7 +19,7 @@ using namespace llvm; namespace llvm { void initializeUnrollAnalyzerTestPass(PassRegistry &); -static SmallVector, 16> SimplifiedValuesVector; +static SmallVector, 16> SimplifiedValuesVector; static unsigned TripCount = 0; namespace { @@ -38,7 +38,7 @@ struct UnrollAnalyzerTest : public FunctionPass { SimplifiedValuesVector.clear(); TripCount = SE->getSmallConstantTripCount(L, Exiting); for (unsigned Iteration = 0; Iteration < TripCount; Iteration++) { - DenseMap SimplifiedValues; + DenseMap SimplifiedValues; UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, *SE, L); for (auto *BB : L->getBlocks()) for (Instruction &I : *BB)