mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[TRE] Reland: allow TRE for non-capturing calls.
The D82085 "allow TRE for non-capturing calls" caused failure during bootstrap. This patch does the same as D82085 plus fixes bootstrap error. The problem with D82085 is that it does not create copies for byval operands, while replacing function call with a branch. Consider following example: ``` int zoo ( S p1 ); int foo ( int count, S p1 ) { if ( count > 10 ) return zoo(p1); // temporarily variable created for passing byvalue parameter // p1 could be used when zoo(p1) is called(after TRE is done). // lifetime.start p1.byvalue.temp return foo(count+1, p1); // lifetime.end p1.byvalue.temp } ``` After recursive call to foo is replaced with a jump into start of the function, its parameters could be passed to zoo function. i.e. temporarily variable created for byvalue parameter "p1" could be passed to zoo. Finally zoo receives broken operand: ``` int foo ( int count, S p1 ) { :tailrecurse p1_tr = phi p1, p1.byvalue.temp if ( count > 10 ) return zoo(p1_tr); // temporarily variable created for passing byvalue parameter // p1 could be used when zoo(p1) is called(after TRE is done). lifetime.start p1.byvalue.temp memcpy (p1.byvalue.temp, p1_tr) count = count + 1 lifetime.end p1.byvalue.temp br tailrecurse } ``` To prevent using p1.byvalue.temp after its scope finished by lifetime.end marker this patch copies value from p1.byvalue.temp into another temporarily variable and then copies this variable into the input parameter for next iteration. This patch passes bootstrap build and bootstrap build with AddressSanitizer. Differential Revision: https://reviews.llvm.org/D85614
This commit is contained in:
parent
afaf301e48
commit
28ca22b845
@ -63,6 +63,7 @@
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/PostDominators.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
@ -70,6 +71,7 @@
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
@ -81,6 +83,7 @@
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "tailcallelim"
|
||||
@ -92,10 +95,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
|
||||
/// Scan the specified function for alloca instructions.
|
||||
/// If it contains any dynamic allocas, returns false.
|
||||
static bool canTRE(Function &F) {
|
||||
// FIXME: The code generator produces really bad code when an 'escaping
|
||||
// alloca' is changed from being a static alloca to being a dynamic alloca.
|
||||
// Until this is resolved, disable this transformation if that would ever
|
||||
// happen. This bug is PR962.
|
||||
// TODO: We don't do TRE if dynamic allocas are used.
|
||||
// Dynamic allocas allocate stack space which should be
|
||||
// deallocated before new iteration started. That is
|
||||
// currently not implemented.
|
||||
return llvm::all_of(instructions(F), [](Instruction &I) {
|
||||
auto *AI = dyn_cast<AllocaInst>(&I);
|
||||
return !AI || AI->isStaticAlloca();
|
||||
@ -188,11 +191,9 @@ struct AllocaDerivedValueTracker {
|
||||
};
|
||||
}
|
||||
|
||||
static bool markTails(Function &F, bool &AllCallsAreTailCalls,
|
||||
OptimizationRemarkEmitter *ORE) {
|
||||
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
|
||||
if (F.callsFunctionThatReturnsTwice())
|
||||
return false;
|
||||
AllCallsAreTailCalls = true;
|
||||
|
||||
// The local stack holds all alloca instructions and all byval arguments.
|
||||
AllocaDerivedValueTracker Tracker;
|
||||
@ -282,11 +283,8 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
|
||||
}
|
||||
}
|
||||
|
||||
if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
|
||||
if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
|
||||
DeferredTails.push_back(CI);
|
||||
} else {
|
||||
AllCallsAreTailCalls = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *SuccBB : successors(BB)) {
|
||||
@ -323,8 +321,6 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
|
||||
LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
|
||||
CI->setTailCall();
|
||||
Modified = true;
|
||||
} else {
|
||||
AllCallsAreTailCalls = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -336,6 +332,14 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
|
||||
/// instructions between the call and this instruction are movable.
|
||||
///
|
||||
static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
return true;
|
||||
|
||||
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
|
||||
llvm::findAllocaForValue(II->getArgOperand(1)))
|
||||
return true;
|
||||
|
||||
// FIXME: We can move load/store/call/free instructions above the call if the
|
||||
// call does not mod/ref the memory location being processed.
|
||||
if (I->mayHaveSideEffects()) // This also handles volatile loads.
|
||||
@ -402,7 +406,6 @@ class TailRecursionEliminator {
|
||||
// createTailRecurseLoopHeader the first time we find a call we can eliminate.
|
||||
BasicBlock *HeaderBB = nullptr;
|
||||
SmallVector<PHINode *, 8> ArgumentPHIs;
|
||||
bool RemovableCallsMustBeMarkedTail = false;
|
||||
|
||||
// PHI node to store our return value.
|
||||
PHINode *RetPN = nullptr;
|
||||
@ -429,8 +432,7 @@ class TailRecursionEliminator {
|
||||
DomTreeUpdater &DTU)
|
||||
: F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
|
||||
|
||||
CallInst *findTRECandidate(BasicBlock *BB,
|
||||
bool CannotTailCallElimCallsMarkedTail);
|
||||
CallInst *findTRECandidate(BasicBlock *BB);
|
||||
|
||||
void createTailRecurseLoopHeader(CallInst *CI);
|
||||
|
||||
@ -440,7 +442,11 @@ class TailRecursionEliminator {
|
||||
|
||||
void cleanupAndFinalize();
|
||||
|
||||
bool processBlock(BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail);
|
||||
bool processBlock(BasicBlock &BB);
|
||||
|
||||
void copyByValueOperandIntoLocalTemp(CallInst *CI, int OpndIdx);
|
||||
|
||||
void copyLocalTempOfByValueOperandIntoArguments(CallInst *CI, int OpndIdx);
|
||||
|
||||
public:
|
||||
static bool eliminate(Function &F, const TargetTransformInfo *TTI,
|
||||
@ -449,8 +455,7 @@ public:
|
||||
};
|
||||
} // namespace
|
||||
|
||||
CallInst *TailRecursionEliminator::findTRECandidate(
|
||||
BasicBlock *BB, bool CannotTailCallElimCallsMarkedTail) {
|
||||
CallInst *TailRecursionEliminator::findTRECandidate(BasicBlock *BB) {
|
||||
Instruction *TI = BB->getTerminator();
|
||||
|
||||
if (&BB->front() == TI) // Make sure there is something before the terminator.
|
||||
@ -470,9 +475,9 @@ CallInst *TailRecursionEliminator::findTRECandidate(
|
||||
--BBI;
|
||||
}
|
||||
|
||||
// If this call is marked as a tail call, and if there are dynamic allocas in
|
||||
// the function, we cannot perform this optimization.
|
||||
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
|
||||
assert((!CI->isTailCall() || !CI->isNoTailCall()) &&
|
||||
"Incompatible call site attributes(Tail,NoTail)");
|
||||
if (!CI->isTailCall())
|
||||
return nullptr;
|
||||
|
||||
// As a special case, detect code like this:
|
||||
@ -504,26 +509,13 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
|
||||
BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
|
||||
BI->setDebugLoc(CI->getDebugLoc());
|
||||
|
||||
// If this function has self recursive calls in the tail position where some
|
||||
// are marked tail and some are not, only transform one flavor or another.
|
||||
// We have to choose whether we move allocas in the entry block to the new
|
||||
// entry block or not, so we can't make a good choice for both. We make this
|
||||
// decision here based on whether the first call we found to remove is
|
||||
// marked tail.
|
||||
// NOTE: We could do slightly better here in the case that the function has
|
||||
// no entry block allocas.
|
||||
RemovableCallsMustBeMarkedTail = CI->isTailCall();
|
||||
|
||||
// If this tail call is marked 'tail' and if there are any allocas in the
|
||||
// entry block, move them up to the new entry block.
|
||||
if (RemovableCallsMustBeMarkedTail)
|
||||
// Move all fixed sized allocas from HeaderBB to NewEntry.
|
||||
for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
|
||||
NEBI = NewEntry->begin();
|
||||
OEBI != E;)
|
||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
|
||||
if (isa<ConstantInt>(AI->getArraySize()))
|
||||
AI->moveBefore(&*NEBI);
|
||||
// Move all fixed sized allocas from HeaderBB to NewEntry.
|
||||
for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
|
||||
NEBI = NewEntry->begin();
|
||||
OEBI != E;)
|
||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
|
||||
if (isa<ConstantInt>(AI->getArraySize()))
|
||||
AI->moveBefore(&*NEBI);
|
||||
|
||||
// Now that we have created a new block, which jumps to the entry
|
||||
// block, insert a PHI node for each argument of the function.
|
||||
@ -588,6 +580,54 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
|
||||
++NumAccumAdded;
|
||||
}
|
||||
|
||||
// Creates a copy of contents of ByValue operand of the specified
|
||||
// call instruction into the newly created temporarily variable.
|
||||
void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
|
||||
int OpndIdx) {
|
||||
PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
|
||||
Type *AggTy = ArgTy->getElementType();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
|
||||
// Get alignment of byVal operand.
|
||||
Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
|
||||
|
||||
// Create alloca for temporarily byval operands.
|
||||
// Put alloca into the entry block.
|
||||
Value *NewAlloca = new AllocaInst(
|
||||
AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
|
||||
CI->getArgOperand(OpndIdx)->getName(), &*F.getEntryBlock().begin());
|
||||
|
||||
IRBuilder<> Builder(CI);
|
||||
Value *Size = Builder.getInt64(DL.getTypeAllocSize(AggTy));
|
||||
|
||||
// Copy data from byvalue operand into the temporarily variable.
|
||||
Builder.CreateMemCpy(NewAlloca, /*DstAlign*/ Alignment,
|
||||
CI->getArgOperand(OpndIdx),
|
||||
/*SrcAlign*/ Alignment, Size);
|
||||
CI->setArgOperand(OpndIdx, NewAlloca);
|
||||
}
|
||||
|
||||
// Creates a copy from temporarily variable(keeping value of ByVal argument)
|
||||
// into the corresponding function argument location.
|
||||
void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
|
||||
CallInst *CI, int OpndIdx) {
|
||||
PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
|
||||
Type *AggTy = ArgTy->getElementType();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
|
||||
// Get alignment of byVal operand.
|
||||
Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
|
||||
|
||||
IRBuilder<> Builder(CI);
|
||||
Value *Size = Builder.getInt64(DL.getTypeAllocSize(AggTy));
|
||||
|
||||
// Copy data from the temporarily variable into corresponding
|
||||
// function argument location.
|
||||
Builder.CreateMemCpy(F.getArg(OpndIdx), /*DstAlign*/ Alignment,
|
||||
CI->getArgOperand(OpndIdx),
|
||||
/*SrcAlign*/ Alignment, Size);
|
||||
}
|
||||
|
||||
bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
|
||||
ReturnInst *Ret = cast<ReturnInst>(CI->getParent()->getTerminator());
|
||||
|
||||
@ -626,14 +666,22 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
|
||||
if (!HeaderBB)
|
||||
createTailRecurseLoopHeader(CI);
|
||||
|
||||
if (RemovableCallsMustBeMarkedTail && !CI->isTailCall())
|
||||
return false;
|
||||
// Copy values of ByVal operands into local temporarily variables.
|
||||
for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
|
||||
if (CI->isByValArgument(I))
|
||||
copyByValueOperandIntoLocalTemp(CI, I);
|
||||
}
|
||||
|
||||
// Ok, now that we know we have a pseudo-entry block WITH all of the
|
||||
// required PHI nodes, add entries into the PHI node for the actual
|
||||
// parameters passed into the tail-recursive call.
|
||||
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
|
||||
ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
|
||||
for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
|
||||
if (CI->isByValArgument(I)) {
|
||||
copyLocalTempOfByValueOperandIntoArguments(CI, I);
|
||||
ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);
|
||||
} else
|
||||
ArgumentPHIs[I]->addIncoming(CI->getArgOperand(I), BB);
|
||||
}
|
||||
|
||||
if (AccRecInstr) {
|
||||
insertAccumulator(AccRecInstr);
|
||||
@ -750,8 +798,7 @@ void TailRecursionEliminator::cleanupAndFinalize() {
|
||||
}
|
||||
}
|
||||
|
||||
bool TailRecursionEliminator::processBlock(
|
||||
BasicBlock &BB, bool CannotTailCallElimCallsMarkedTail) {
|
||||
bool TailRecursionEliminator::processBlock(BasicBlock &BB) {
|
||||
Instruction *TI = BB.getTerminator();
|
||||
|
||||
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
|
||||
@ -764,7 +811,7 @@ bool TailRecursionEliminator::processBlock(
|
||||
if (!Ret)
|
||||
return false;
|
||||
|
||||
CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
|
||||
CallInst *CI = findTRECandidate(&BB);
|
||||
|
||||
if (!CI)
|
||||
return false;
|
||||
@ -785,7 +832,7 @@ bool TailRecursionEliminator::processBlock(
|
||||
eliminateCall(CI);
|
||||
return true;
|
||||
} else if (isa<ReturnInst>(TI)) {
|
||||
CallInst *CI = findTRECandidate(&BB, CannotTailCallElimCallsMarkedTail);
|
||||
CallInst *CI = findTRECandidate(&BB);
|
||||
|
||||
if (CI)
|
||||
return eliminateCall(CI);
|
||||
@ -803,26 +850,21 @@ bool TailRecursionEliminator::eliminate(Function &F,
|
||||
return false;
|
||||
|
||||
bool MadeChange = false;
|
||||
bool AllCallsAreTailCalls = false;
|
||||
MadeChange |= markTails(F, AllCallsAreTailCalls, ORE);
|
||||
if (!AllCallsAreTailCalls)
|
||||
return MadeChange;
|
||||
MadeChange |= markTails(F, ORE);
|
||||
|
||||
// If this function is a varargs function, we won't be able to PHI the args
|
||||
// right, so don't even try to convert it...
|
||||
if (F.getFunctionType()->isVarArg())
|
||||
return MadeChange;
|
||||
|
||||
// If false, we cannot perform TRE on tail calls marked with the 'tail'
|
||||
// attribute, because doing so would cause the stack size to increase (real
|
||||
// TRE would deallocate variable sized allocas, TRE doesn't).
|
||||
bool CanTRETailMarkedCall = canTRE(F);
|
||||
if (!canTRE(F))
|
||||
return MadeChange;
|
||||
|
||||
// Change any tail recursive calls to loops.
|
||||
TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
|
||||
|
||||
for (BasicBlock &BB : F)
|
||||
MadeChange |= TRE.processBlock(BB, !CanTRETailMarkedCall);
|
||||
MadeChange |= TRE.processBlock(BB);
|
||||
|
||||
TRE.cleanupAndFinalize();
|
||||
|
||||
|
@ -12,15 +12,16 @@ define void @test0() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; PR615. Make sure that we do not move the alloca so that it interferes with the tail call.
|
||||
; Make sure that we do not do TRE if pointer to local stack
|
||||
; escapes through function call.
|
||||
define i32 @test1() {
|
||||
; CHECK: i32 @test1()
|
||||
; CHECK-NEXT: alloca
|
||||
%A = alloca i32 ; <i32*> [#uses=2]
|
||||
store i32 5, i32* %A
|
||||
call void @use(i32* %A)
|
||||
; CHECK: tail call i32 @test1
|
||||
%X = tail call i32 @test1() ; <i32> [#uses=1]
|
||||
; CHECK: call i32 @test1
|
||||
%X = call i32 @test1() ; <i32> [#uses=1]
|
||||
ret i32 %X
|
||||
}
|
||||
|
||||
|
144
test/Transforms/TailCallElim/tre-byval-parameter-2.ll
Normal file
144
test/Transforms/TailCallElim/tre-byval-parameter-2.ll
Normal file
@ -0,0 +1,144 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
|
||||
|
||||
; the test was generated from the following C++ source:
|
||||
;
|
||||
; #include <stdio.h>
|
||||
; typedef struct A { long long x[10] = {0}; } A;
|
||||
; A global;
|
||||
; void dostuff(A a, A b, int i) {
|
||||
; if (i==10) return;
|
||||
; a.x[5]++;
|
||||
; printf("%lld %lld\n", a.x[5], b.x[5]); dostuff(b, a, i+1);
|
||||
; }
|
||||
; __attribute((optnone)) int main() { dostuff(global, global, 0); }
|
||||
;
|
||||
; This test checks that values for two ByValue operands are copied
|
||||
; into temporarily variables first and then the temporaily
|
||||
; variables are copied into original function arguments location.
|
||||
|
||||
%struct.A = type { [10 x i64] }
|
||||
|
||||
@global = dso_local local_unnamed_addr global %struct.A zeroinitializer, align 8
|
||||
@.str = private unnamed_addr constant [11 x i8] c"%lld %lld\0A\00", align 1
|
||||
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local void @_Z7dostuff1AS_i(%struct.A* nocapture byval(%struct.A) align 8 %a, %struct.A* nocapture readonly byval(%struct.A) align 8 %b, i32 %i) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: @_Z7dostuff1AS_i(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[AGG_TMP52:%.*]] = alloca [[STRUCT_A:%.*]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP1:%.*]] = alloca [[STRUCT_A]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_A]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_A]], align 8
|
||||
; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
|
||||
; CHECK: tailrecurse:
|
||||
; CHECK-NEXT: [[I_TR:%.*]] = phi i32 [ [[I:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_TR]], 10
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_A]], %struct.A* [[A:%.*]], i64 0, i32 0, i64 5
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[INC:%.*]] = add nsw i64 [[TMP0]], 1
|
||||
; CHECK-NEXT: store i64 [[INC]], i64* [[ARRAYIDX]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCT_A]], %struct.A* [[B:%.*]], i64 0, i32 0, i64 5
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARRAYIDX4]], align 8
|
||||
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i64 [[INC]], i64 [[TMP1]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.A* [[AGG_TMP]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 80, i8* nonnull [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.A* [[B]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(80) [[TMP2]], i8* nonnull align 8 dereferenceable(80) [[TMP3]], i64 80, i1 false)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.A* [[AGG_TMP5]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 80, i8* nonnull [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.A* [[A]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(80) [[TMP4]], i8* nonnull align 8 dereferenceable(80) [[TMP5]], i64 80, i1 false)
|
||||
; CHECK-NEXT: [[ADD]] = add nsw i32 [[I_TR]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.A* [[AGG_TMP1]] to i8*
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.A* [[AGG_TMP]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 80, i1 false)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.A* [[AGG_TMP52]] to i8*
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast %struct.A* [[AGG_TMP5]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 80, i1 false)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct.A* [[A]] to i8*
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.A* [[AGG_TMP1]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 80, i1 false)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast %struct.A* [[B]] to i8*
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast %struct.A* [[AGG_TMP52]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 [[TMP13]], i64 80, i1 false)
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 80, i8* nonnull [[TMP2]])
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 80, i8* nonnull [[TMP4]])
|
||||
; CHECK-NEXT: br label [[TAILRECURSE]]
|
||||
; CHECK: return:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%agg.tmp = alloca %struct.A, align 8
|
||||
%agg.tmp5 = alloca %struct.A, align 8
|
||||
%cmp = icmp eq i32 %i, 10
|
||||
br i1 %cmp, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%arrayidx = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i64 5
|
||||
%0 = load i64, i64* %arrayidx, align 8
|
||||
%inc = add nsw i64 %0, 1
|
||||
store i64 %inc, i64* %arrayidx, align 8
|
||||
%arrayidx4 = getelementptr inbounds %struct.A, %struct.A* %b, i64 0, i32 0, i64 5
|
||||
%1 = load i64, i64* %arrayidx4, align 8
|
||||
%call = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str
|
||||
, i64 0, i64 0), i64 %inc, i64 %1)
|
||||
%2 = bitcast %struct.A* %agg.tmp to i8*
|
||||
call void @llvm.lifetime.start.p0i8(i64 80, i8* nonnull %2)
|
||||
%3 = bitcast %struct.A* %b to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(80) %2, i8* nonnull align 8 dereferenceable(80) %3, i64 80, i1 false)
|
||||
%4 = bitcast %struct.A* %agg.tmp5 to i8*
|
||||
call void @llvm.lifetime.start.p0i8(i64 80, i8* nonnull %4)
|
||||
%5 = bitcast %struct.A* %a to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(80) %4, i8* nonnull align 8 dereferenceable(80) %5, i64 80, i1 false)
|
||||
%add = add nsw i32 %i, 1
|
||||
call void @_Z7dostuff1AS_i(%struct.A* nonnull byval(%struct.A) align 8 %agg.tmp, %struct.A* nonnull byval(%struct.A) align 8 %agg.tmp5, i32 %add)
|
||||
call void @llvm.lifetime.end.p0i8(i64 80, i8* nonnull %2)
|
||||
call void @llvm.lifetime.end.p0i8(i64 80, i8* nonnull %4)
|
||||
br label %return
|
||||
|
||||
return: ; preds = %entry, %if.end
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nofree nounwind
|
||||
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #1
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
; Function Attrs: noinline norecurse nounwind optnone uwtable
|
||||
define dso_local i32 @main() local_unnamed_addr #3 {
|
||||
; CHECK-LABEL: @main(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_A:%.*]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP1:%.*]] = alloca [[STRUCT_A]], align 8
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.A* [[AGG_TMP]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 bitcast (%struct.A* @global to i8*), i64 80, i1 false)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.A* [[AGG_TMP1]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 bitcast (%struct.A* @global to i8*), i64 80, i1 false)
|
||||
; CHECK-NEXT: tail call void @_Z7dostuff1AS_i(%struct.A* byval(%struct.A) align 8 [[AGG_TMP]], %struct.A* byval(%struct.A) align 8 [[AGG_TMP1]], i32 0)
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
entry:
|
||||
%agg.tmp = alloca %struct.A, align 8
|
||||
%agg.tmp1 = alloca %struct.A, align 8
|
||||
%0 = bitcast %struct.A* %agg.tmp to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 bitcast (%struct.A* @global to i8*), i64 80, i1 false)
|
||||
%1 = bitcast %struct.A* %agg.tmp1 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 bitcast (%struct.A* @global to i8*), i64 80, i1 false)
|
||||
call void @_Z7dostuff1AS_i(%struct.A* byval(%struct.A) align 8 %agg.tmp, %struct.A* byval(%struct.A) align 8 %agg.tmp1, i32 0)
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { uwtable }
|
||||
attributes #1 = { uwtable }
|
||||
attributes #2 = { argmemonly nounwind willreturn }
|
117
test/Transforms/TailCallElim/tre-byval-parameter.ll
Normal file
117
test/Transforms/TailCallElim/tre-byval-parameter.ll
Normal file
@ -0,0 +1,117 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
|
||||
|
||||
; the test was generated from the following C++ source:
|
||||
;
|
||||
; int zoo ( S p1 );
|
||||
;
|
||||
; int foo ( int count, S p1 ) {
|
||||
; if ( count > 10 )
|
||||
; return zoo(p1);
|
||||
;
|
||||
; // After TRE: temporarily variable created for passing byvalue parameter
|
||||
; // p1 could be used when zoo(p1) is called.
|
||||
; return foo(count+1, p1);
|
||||
; }
|
||||
|
||||
; this test checks that value of ByValue operand AGG_TMP_I of call site foo()
|
||||
; is copied into temporarily variable AGG_TMP_I1(byVal value holder) and
|
||||
; later the value from AGG_TMP_I1 is copied into function argument P1 before
|
||||
; new iteration started.
|
||||
|
||||
%struct.S = type { i32, i32, float, %struct.B }
|
||||
%struct.B = type { i32, float }
|
||||
|
||||
; Function Attrs: uwtable
|
||||
define dso_local i32 @_Z3fooi1S(i32 %count, %struct.S* nocapture readonly byval(%struct.S) align 8 %p1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: @_Z3fooi1S(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[AGG_TMP_I1:%.*]] = alloca [[STRUCT_S:%.*]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP_I:%.*]] = alloca [[STRUCT_S]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP14:%.*]] = alloca [[STRUCT_S]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_S]], align 8
|
||||
; CHECK-NEXT: [[AGG_TMP1:%.*]] = alloca [[STRUCT_S]], align 8
|
||||
; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
|
||||
; CHECK: tailrecurse:
|
||||
; CHECK-NEXT: [[COUNT_TR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[COUNT_TR]], 10
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.S* [[AGG_TMP]] to i8*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[P1:%.*]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP0]], i8* nonnull align 8 dereferenceable(20) [[TMP1]], i64 20, i1 false)
|
||||
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @_Z3zoo1S(%struct.S* nonnull byval(%struct.S) align 8 [[AGG_TMP]])
|
||||
; CHECK-NEXT: br label [[RETURN:%.*]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[ADD]] = add nsw i32 [[COUNT_TR]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[AGG_TMP1]] to i8*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[P1]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP2]], i8* nonnull align 8 dereferenceable(20) [[TMP3]], i64 20, i1 false)
|
||||
; CHECK-NEXT: [[AGG_TMP14_0__SROA_CAST:%.*]] = bitcast %struct.S* [[AGG_TMP14]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull [[AGG_TMP14_0__SROA_CAST]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.S* [[AGG_TMP_I]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull [[TMP4]])
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[AGG_TMP14_0__SROA_CAST]], i8* nonnull align 8 dereferenceable(20) [[TMP2]], i64 20, i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP4]], i8* nonnull align 8 dereferenceable(20) [[AGG_TMP14_0__SROA_CAST]], i64 20, i1 false)
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.S* [[AGG_TMP_I1]] to i8*
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[AGG_TMP_I]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 20, i1 false)
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[P1]] to i8*
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[AGG_TMP_I1]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 20, i1 false)
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull [[AGG_TMP14_0__SROA_CAST]])
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull [[TMP4]])
|
||||
; CHECK-NEXT: br label [[TAILRECURSE]]
|
||||
; CHECK: return:
|
||||
; CHECK-NEXT: ret i32 [[CALL]]
|
||||
;
|
||||
entry:
|
||||
%agg.tmp.i = alloca %struct.S, align 8
|
||||
%agg.tmp14 = alloca %struct.S, align 8
|
||||
%agg.tmp = alloca %struct.S, align 8
|
||||
%agg.tmp1 = alloca %struct.S, align 8
|
||||
%cmp = icmp sgt i32 %count, 10
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%0 = bitcast %struct.S* %agg.tmp to i8*
|
||||
%1 = bitcast %struct.S* %p1 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %0, i8* nonnull align 8 dereferenceable(20) %1, i64 20, i1 false)
|
||||
%call = call i32 @_Z3zoo1S(%struct.S* nonnull byval(%struct.S) align 8 %agg.tmp)
|
||||
br label %return
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%add = add nsw i32 %count, 1
|
||||
%2 = bitcast %struct.S* %agg.tmp1 to i8*
|
||||
%3 = bitcast %struct.S* %p1 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %2, i8* nonnull align 8 dereferenceable(20) %3, i64 20, i1 false)
|
||||
%agg.tmp14.0..sroa_cast = bitcast %struct.S* %agg.tmp14 to i8*
|
||||
call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %agg.tmp14.0..sroa_cast)
|
||||
%4 = bitcast %struct.S* %agg.tmp.i to i8*
|
||||
call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %4)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %agg.tmp14.0..sroa_cast, i8* nonnull align 8 dereferenceable(20) %2, i64 20, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %4, i8* nonnull align 8 dereferenceable(20) %agg.tmp14.0..sroa_cast, i64 20, i1 false)
|
||||
%call.i = call i32 @_Z3fooi1S(i32 %add, %struct.S* nonnull byval(%struct.S) align 8 %agg.tmp.i)
|
||||
call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %agg.tmp14.0..sroa_cast)
|
||||
call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %4)
|
||||
br label %return
|
||||
|
||||
return: ; preds = %if.end, %if.then
|
||||
%retval.0 = phi i32 [ %call, %if.then ], [ %call.i, %if.end ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
declare dso_local i32 @_Z3zoo1S(%struct.S* byval(%struct.S) align 8) local_unnamed_addr #1
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2
|
||||
|
||||
attributes #0 = { uwtable }
|
||||
attributes #1 = { uwtable }
|
||||
attributes #2 = { argmemonly nounwind willreturn }
|
125
test/Transforms/TailCallElim/tre-multiple-exits.ll
Normal file
125
test/Transforms/TailCallElim/tre-multiple-exits.ll
Normal file
@ -0,0 +1,125 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
|
||||
|
||||
; This test checks that TRE would be done for only one recursive call.
|
||||
; The test_multiple_exits function has three recursive calls.
|
||||
; First recursive call could not be eliminated because there is
|
||||
; escaped pointer to local variable. Second recursive call could
|
||||
; be eliminated. Thrid recursive call could not be eliminated since
|
||||
; this is not last call. Thus, test checks that TRE would be done
|
||||
; for only second recursive call.
|
||||
|
||||
; IR for that test was generated from the following C++ source:
|
||||
;
|
||||
; void capture_arg (int*);
|
||||
; void test_multiple_exits (int param);
|
||||
; if (param >= 0 && param < 10) {
|
||||
; int temp;
|
||||
; capture_arg(&temp);
|
||||
; // TRE could not be done because pointer to local
|
||||
; // variable "temp" is escaped.
|
||||
; test_multiple_exits(param + 1);
|
||||
; } else if (param >=10 && param < 20) {
|
||||
; // TRE should be done.
|
||||
; test_multiple_exits(param + 1);
|
||||
; } else if (param >= 20 && param < 22) {
|
||||
; // TRE could not be done since recursive
|
||||
; // call is not last call.
|
||||
; test_multiple_exits(param + 1);
|
||||
; func();
|
||||
; }
|
||||
;
|
||||
; return;
|
||||
; }
|
||||
|
||||
; Function Attrs: noinline optnone uwtable
|
||||
declare void @_Z11capture_argPi(i32* %param) #0
|
||||
|
||||
; Function Attrs: noinline optnone uwtable
|
||||
declare void @_Z4funcv() #0
|
||||
|
||||
; Function Attrs: noinline nounwind uwtable
|
||||
define dso_local void @_Z19test_multiple_exitsi(i32 %param) local_unnamed_addr #2 {
|
||||
; CHECK-LABEL: @_Z19test_multiple_exitsi(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TEMP:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
|
||||
; CHECK: tailrecurse:
|
||||
; CHECK-NEXT: [[PARAM_TR:%.*]] = phi i32 [ [[PARAM:%.*]], [[ENTRY:%.*]] ], [ [[ADD6:%.*]], [[IF_THEN5:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[PARAM_TR]], 10
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TEMP]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
|
||||
; CHECK-NEXT: call void @_Z11capture_argPi(i32* nonnull [[TEMP]])
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[PARAM_TR]], 1
|
||||
; CHECK-NEXT: call void @_Z19test_multiple_exitsi(i32 [[ADD]])
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
|
||||
; CHECK-NEXT: br label [[IF_END14:%.*]]
|
||||
; CHECK: if.else:
|
||||
; CHECK-NEXT: [[PARAM_OFF:%.*]] = add i32 [[PARAM_TR]], -10
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[PARAM_OFF]], 10
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN5]], label [[IF_ELSE7:%.*]]
|
||||
; CHECK: if.then5:
|
||||
; CHECK-NEXT: [[ADD6]] = add nuw nsw i32 [[PARAM_TR]], 1
|
||||
; CHECK-NEXT: br label [[TAILRECURSE]]
|
||||
; CHECK: if.else7:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[PARAM_TR]], -2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 20
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[IF_THEN11:%.*]], label [[IF_END14]]
|
||||
; CHECK: if.then11:
|
||||
; CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[PARAM_TR]], 1
|
||||
; CHECK-NEXT: tail call void @_Z19test_multiple_exitsi(i32 [[ADD12]])
|
||||
; CHECK-NEXT: tail call void @_Z4funcv()
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: if.end14:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%temp = alloca i32, align 4
|
||||
%0 = icmp ult i32 %param, 10
|
||||
br i1 %0, label %if.then, label %if.else
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = bitcast i32* %temp to i8*
|
||||
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #2
|
||||
call void @_Z11capture_argPi(i32* nonnull %temp)
|
||||
%add = add nuw nsw i32 %param, 1
|
||||
call void @_Z19test_multiple_exitsi(i32 %add)
|
||||
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #2
|
||||
br label %if.end14
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%param.off = add i32 %param, -10
|
||||
%2 = icmp ult i32 %param.off, 10
|
||||
br i1 %2, label %if.then5, label %if.else7
|
||||
|
||||
if.then5: ; preds = %if.else
|
||||
%add6 = add nuw nsw i32 %param, 1
|
||||
call void @_Z19test_multiple_exitsi(i32 %add6)
|
||||
br label %if.end14
|
||||
|
||||
if.else7: ; preds = %if.else
|
||||
%3 = and i32 %param, -2
|
||||
%4 = icmp eq i32 %3, 20
|
||||
br i1 %4, label %if.then11, label %if.end14
|
||||
|
||||
if.then11: ; preds = %if.else7
|
||||
%add12 = add nsw i32 %param, 1
|
||||
call void @_Z19test_multiple_exitsi(i32 %add12)
|
||||
call void @_Z4funcv()
|
||||
br label %if.end14
|
||||
|
||||
if.end14: ; preds = %if.then5, %if.then11, %if.else7, %if.then
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
attributes #0 = { nofree noinline norecurse nounwind uwtable }
|
||||
attributes #1 = { nounwind uwtable }
|
||||
attributes #2 = { argmemonly nounwind willreturn }
|
@ -0,0 +1,74 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
|
||||
|
||||
; IR for that test was generated from the following C++ source:
|
||||
;
|
||||
;int count;
|
||||
;__attribute__((noinline)) void globalIncrement(const int* param) { count += *param; }
|
||||
;
|
||||
;void test(int recurseCount)
|
||||
;{
|
||||
; if (recurseCount == 0) return;
|
||||
; int temp = 10;
|
||||
; globalIncrement(&temp);
|
||||
; test(recurseCount - 1);
|
||||
;}
|
||||
;
|
||||
|
||||
@count = dso_local local_unnamed_addr global i32 0, align 4
|
||||
|
||||
; Function Attrs: nofree noinline norecurse nounwind uwtable
|
||||
declare void @_Z15globalIncrementPKi(i32* nocapture readonly %param) #0
|
||||
|
||||
; Test that TRE could be done for recursive tail routine containing
|
||||
; call to function receiving a pointer to local stack.
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define dso_local void @_Z4testi(i32 %recurseCount) local_unnamed_addr #1 {
|
||||
; CHECK-LABEL: @_Z4testi(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TEMP:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
|
||||
; CHECK: tailrecurse:
|
||||
; CHECK-NEXT: [[RECURSECOUNT_TR:%.*]] = phi i32 [ [[RECURSECOUNT:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[IF_END:%.*]] ]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[RECURSECOUNT_TR]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[TEMP]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP0]])
|
||||
; CHECK-NEXT: store i32 10, i32* [[TEMP]], align 4
|
||||
; CHECK-NEXT: call void @_Z15globalIncrementPKi(i32* nonnull [[TEMP]])
|
||||
; CHECK-NEXT: [[SUB]] = add nsw i32 [[RECURSECOUNT_TR]], -1
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP0]])
|
||||
; CHECK-NEXT: br label [[TAILRECURSE]]
|
||||
; CHECK: return:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%temp = alloca i32, align 4
|
||||
%cmp = icmp eq i32 %recurseCount, 0
|
||||
br i1 %cmp, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%0 = bitcast i32* %temp to i8*
|
||||
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #6
|
||||
store i32 10, i32* %temp, align 4
|
||||
call void @_Z15globalIncrementPKi(i32* nonnull %temp)
|
||||
%sub = add nsw i32 %recurseCount, -1
|
||||
call void @_Z4testi(i32 %sub)
|
||||
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #6
|
||||
br label %return
|
||||
|
||||
return: ; preds = %entry, %if.end
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
|
||||
|
||||
attributes #0 = { nofree noinline norecurse nounwind uwtable }
|
||||
attributes #1 = { nounwind uwtable }
|
||||
attributes #2 = { argmemonly nounwind willreturn }
|
Loading…
Reference in New Issue
Block a user