From f318924b28fa2b2bee1c173e86424daff59fce34 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 27 Jul 2012 21:21:26 +0000 Subject: [PATCH] Teach CodeGenPrep to look past bitcast when it's duplicating return instruction into predecessor blocks to enable tail call optimization. rdar://11958338 llvm-svn: 160894 --- lib/Transforms/Scalar/CodeGenPrepare.cpp | 17 ++++- lib/Transforms/Utils/BasicBlockUtils.cpp | 24 +++++-- test/CodeGen/X86/tailcall-cgp-dup.ll | 87 ++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/X86/tailcall-cgp-dup.ll diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index f425cd03be6..4b4a8c598fc 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -645,10 +645,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; + PHINode *PN = 0; + BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); - PHINode *PN = V ? dyn_cast(V) : NULL; - if (V && !PN) - return false; + if (V) { + BCI = dyn_cast(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast(V); + if (!PN) + return false; + } BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) @@ -666,6 +674,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; if (&*BI != RI) return false; } else { diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 55764321491..2679b933f6b 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); + i != e; ++i) { + Value *V = *i; + Instruction *NewBC = 0; + if (BitCastInst *BCI = dyn_cast(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + Pred->getInstList().insert(NewRet, NewBC); + *i = NewBC; + } + if (PHINode *PN = dyn_cast(V)) { + if (PN->getParent() == BB) { + if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + *i = PN->getIncomingValueForBlock(Pred); + } + } + } // Update any PHI nodes in the returning block to realize that we no // longer branch to them. diff --git a/test/CodeGen/X86/tailcall-cgp-dup.ll b/test/CodeGen/X86/tailcall-cgp-dup.ll new file mode 100644 index 00000000000..a80b90f9eee --- /dev/null +++ b/test/CodeGen/X86/tailcall-cgp-dup.ll @@ -0,0 +1,87 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +; Teach CGP to dup returns to enable tail call optimization. +; rdar://9147433 + +define i32 @foo(i32 %x) nounwind ssp { +; CHECK: foo: +entry: + switch i32 %x, label %return [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb3 + i32 4, label %sw.bb5 + i32 5, label %sw.bb7 + i32 6, label %sw.bb9 + ] + +sw.bb: ; preds = %entry +; CHECK: jmp _f1 + %call = tail call i32 @f1() nounwind + br label %return + +sw.bb1: ; preds = %entry +; CHECK: jmp _f2 + %call2 = tail call i32 @f2() nounwind + br label %return + +sw.bb3: ; preds = %entry +; CHECK: jmp _f3 + %call4 = tail call i32 @f3() nounwind + br label %return + +sw.bb5: ; preds = %entry +; CHECK: jmp _f4 + %call6 = tail call i32 @f4() nounwind + br label %return + +sw.bb7: ; preds = %entry +; CHECK: jmp _f5 + %call8 = tail call i32 @f5() nounwind + br label %return + +sw.bb9: ; preds = %entry +; CHECK: jmp _f6 + %call10 = tail call i32 @f6() nounwind + br label %return + +return: ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb + %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ] + ret i32 %retval.0 +} + +declare i32 @f1() + +declare i32 @f2() + +declare i32 @f3() + +declare i32 @f4() + +declare i32 @f5() + +declare i32 @f6() + +; rdar://11958338 +%0 = type opaque + +declare i8* @bar(i8*) uwtable optsize noinline ssp + +define hidden %0* @thingWithValue(i8* %self) uwtable ssp { +entry: +; CHECK: thingWithValue: +; CHECK: jmp _bar + br i1 undef, label %if.then.i, label %if.else.i + +if.then.i: ; preds = %entry + br label %someThingWithValue.exit + +if.else.i: ; preds = %entry + %call4.i = tail call i8* @bar(i8* undef) optsize + br label %someThingWithValue.exit + +someThingWithValue.exit: ; preds = %if.else.i, %if.then.i + %retval.0.in.i = phi i8* [ undef, %if.then.i ], [ %call4.i, %if.else.i ] + %retval.0.i = bitcast i8* %retval.0.in.i to %0* + ret %0* %retval.0.i +}