[CGP] Convert phi types

If a collection of interconnected phi nodes is only ever loaded, stored or bitcast then we can convert the whole set to the bitcast type, potentially helping to reduce the number of register moves needed as the phi's are passed across basic block boundaries. This has to be done in CodegenPrepare as it naturally straddles basic blocks. The alorithm just looks from phi nodes, looking at uses and operands for a collection of nodes that all together are bitcast between float and integer types. We record visited phi nodes to not have to process them more than once. The whole subgraph is then replaced with a new type. Loads and Stores are bitcast to the correct type, which should then be folded into the load/store, changing it's type. This comes up in the biquad testcase due to the way MVE needs to keep values in integer registers. I have also seen it come up from aarch64 partner example code, where a complicated set of sroa/inlining produced integer phis, where float would have been a better choice. I also added undef and extract element handling which increased the potency in some cases. This adds it with an option that defaults to off, and disabled for 32bit X86 due to potential issues around canonicalizing NaNs. Differential Revision: https://reviews.llvm.org/D81827
2025-01-31 12:41:49 +01:00 · 2020-06-21 11:28:31 +01:00 · 2020-06-21 11:28:31 +01:00 · 0987ead525
commit 0987ead525
parent 9b2563a43f
6 changed files with 263 additions and 41 deletions
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@ -2368,6 +2368,14 @@ public:
    return nullptr;
  }

+  /// Given a set in interconnected phis of type 'From' that are loaded/stored
+  /// or bitcast to type 'To', return true if the set should be converted to
+  /// 'To'.
+  virtual bool shouldConvertPhiType(Type *From, Type *To) const {
+    return (From->isIntegerTy() || From->isFloatingPointTy()) &&
+           (To->isIntegerTy() || To->isFloatingPointTy());
+  }
+
  /// Returns true if the opcode is a commutative binary operation.
  virtual bool isCommutativeBinOp(unsigned Opcode) const {
    // FIXME: This should get its info from the td file.
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@ -245,6 +245,10 @@ static cl::opt<bool>
                     cl::desc("Enable BFI update verification for "
                              "CodeGenPrepare."));

+static cl::opt<bool> OptimizePhiTypes(
+    "cgp-optimize-phi-types", cl::Hidden, cl::init(false),
+    cl::desc("Enable converting phi types in CodeGenPrepare"));
+
 namespace {

 enum ExtType {
@ -407,6 +411,9 @@ class TypePromotionTransaction;
                          unsigned CreatedInstsCost = 0);
    bool mergeSExts(Function &F);
    bool splitLargeGEPOffsets();
+    bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
+                         SmallPtrSetImpl<Instruction *> &DeletedInstrs);
+    bool optimizePhiTypes(Function &F);
    bool performAddressTypePromotion(
        Instruction *&Inst,
        bool AllowPromotionWithoutCommonHeader,
@ -515,6 +522,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
      MadeChange |= mergeSExts(F);
    if (!LargeOffsetGEPMap.empty())
      MadeChange |= splitLargeGEPOffsets();
+    MadeChange |= optimizePhiTypes(F);

    if (MadeChange)
      eliminateFallThrough(F);
@ -5717,6 +5725,155 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
  return Changed;
 }

+bool CodeGenPrepare::optimizePhiType(
+    PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
+    SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
+  // We are looking for a collection on interconnected phi nodes that together
+  // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
+  // are of the same type. Convert the whole set of nodes to the type of the
+  // bitcast.
+  Type *PhiTy = I->getType();
+  Type *ConvertTy = nullptr;
+  if (Visited.count(I) ||
+      (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
+    return false;
+
+  SmallVector<Instruction *, 4> Worklist;
+  Worklist.push_back(cast<Instruction>(I));
+  SmallPtrSet<PHINode *, 4> PhiNodes;
+  PhiNodes.insert(I);
+  Visited.insert(I);
+  SmallPtrSet<Instruction *, 4> Defs;
+  SmallPtrSet<Instruction *, 4> Uses;
+
+  while (!Worklist.empty()) {
+    Instruction *II = Worklist.pop_back_val();
+
+    if (auto *Phi = dyn_cast<PHINode>(II)) {
+      // Handle Defs, which might also be PHI's
+      for (Value *V : Phi->incoming_values()) {
+        if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+          if (!PhiNodes.count(OpPhi)) {
+            if (Visited.count(OpPhi))
+              return false;
+            PhiNodes.insert(OpPhi);
+            Visited.insert(OpPhi);
+            Worklist.push_back(OpPhi);
+          }
+        } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
+          if (!Defs.count(OpLoad)) {
+            Defs.insert(OpLoad);
+            Worklist.push_back(OpLoad);
+          }
+        } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
+          if (!Defs.count(OpEx)) {
+            Defs.insert(OpEx);
+            Worklist.push_back(OpEx);
+          }
+        } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+          if (!ConvertTy)
+            ConvertTy = OpBC->getOperand(0)->getType();
+          if (OpBC->getOperand(0)->getType() != ConvertTy)
+            return false;
+          if (!Defs.count(OpBC)) {
+            Defs.insert(OpBC);
+            Worklist.push_back(OpBC);
+          }
+        } else if (!isa<UndefValue>(V))
+          return false;
+      }
+    }
+
+    // Handle uses which might also be phi's
+    for (User *V : II->users()) {
+      if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+        if (!PhiNodes.count(OpPhi)) {
+          if (Visited.count(OpPhi))
+            return false;
+          PhiNodes.insert(OpPhi);
+          Visited.insert(OpPhi);
+          Worklist.push_back(OpPhi);
+        }
+      } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
+        if (OpStore->getOperand(0) != II)
+          return false;
+        Uses.insert(OpStore);
+      } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+        if (!ConvertTy)
+          ConvertTy = OpBC->getType();
+        if (OpBC->getType() != ConvertTy)
+          return false;
+        Uses.insert(OpBC);
+      } else
+        return false;
+    }
+  }
+
+  if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Converting " << *I << "\n  and connected nodes to "
+                    << *ConvertTy << "\n");
+
+  // Create all the new phi nodes of the new type, and bitcast any loads to the
+  // correct type.
+  ValueToValueMap ValMap;
+  ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
+  for (Instruction *D : Defs) {
+    if (isa<BitCastInst>(D))
+      ValMap[D] = D->getOperand(0);
+    else
+      ValMap[D] =
+          new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+  }
+  for (PHINode *Phi : PhiNodes)
+    ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
+                                  Phi->getName() + ".tc", Phi);
+  // Pipe together all the PhiNodes.
+  for (PHINode *Phi : PhiNodes) {
+    PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
+    for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
+      NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
+                          Phi->getIncomingBlock(i));
+  }
+  // And finally pipe up the stores and bitcasts
+  for (Instruction *U : Uses) {
+    if (isa<BitCastInst>(U)) {
+      DeletedInstrs.insert(U);
+      U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
+    } else
+      U->setOperand(0,
+                    new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+  }
+
+  // Save the removed phis to be deleted later.
+  for (PHINode *Phi : PhiNodes)
+    DeletedInstrs.insert(Phi);
+  return true;
+}
+
+bool CodeGenPrepare::optimizePhiTypes(Function &F) {
+  if (!OptimizePhiTypes)
+    return false;
+
+  bool Changed = false;
+  SmallPtrSet<PHINode *, 4> Visited;
+  SmallPtrSet<Instruction *, 4> DeletedInstrs;
+
+  // Attempt to optimize all the phis in the functions to the correct type.
+  for (auto &BB : F)
+    for (auto &Phi : BB.phis())
+      Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
+
+  // Remove any old phi's that have been converted.
+  for (auto *I : DeletedInstrs) {
+    I->replaceAllUsesWith(UndefValue::get(I->getType()));
+    I->eraseFromParent();
+  }
+
+  return Changed;
+}
+
 /// Return true, if an ext(load) can be formed from an extension in
 /// \p MovedExts.
 bool CodeGenPrepare::canFormExtLd(
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -30748,6 +30748,12 @@ bool X86TargetLowering::shouldSinkOperands(Instruction *I,
  return false;
 }

+bool X86TargetLowering::shouldConvertPhiType(Type *From, Type *To) const {
+  if (!Subtarget.is64Bit())
+    return false;
+  return TargetLowering::shouldConvertPhiType(From, To);
+}
+
 bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
  if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0)))
    return false;
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -1187,6 +1187,7 @@ namespace llvm {

    bool shouldSinkOperands(Instruction *I,
                            SmallVectorImpl<Use *> &Ops) const override;
+    bool shouldConvertPhiType(Type *From, Type *To) const override;

    /// Return true if folding a vector load into ExtVal (a sign, zero, or any
    /// extend node) is profitable.
--- a/test/CodeGen/AArch64/convertphitype.ll
+++ b/test/CodeGen/AArch64/convertphitype.ll
@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -codegenprepare %s -S | FileCheck %s
+; RUN: opt -codegenprepare -cgp-optimize-phi-types %s -S | FileCheck %s

 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
@ -11,14 +11,15 @@ define float @convphi1(i32 *%s, i32 *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i32 %n, 0
@ -45,11 +46,11 @@ define float @convphi2(i32 *%s, i32 *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i32 %n, 0
@ -73,11 +74,11 @@ define float @convphi3(i32 *%s, i32 *%d, i32 %n, float %f) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i32 %n, 0
@ -102,10 +103,12 @@ define void @convphi4(i32 *%s, i32 *%d, i32 %n, float %f) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    store i32 [[PHI]], i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC:%.*]] = bitcast float [[PHI_TC]] to i32
+; CHECK-NEXT:    store i32 [[BC]], i32* [[D:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@ -130,14 +133,15 @@ define i64 @convphi_d2i(double *%s, double *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load double, double* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast double [[LS]] to i64
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load double, double* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast double [[LD]] to i64
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi double [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast double [[PHI]] to i64
-; CHECK-NEXT:    ret i64 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i64 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i64 [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i32 %n, 0
@ -164,14 +168,15 @@ define i32 @convphi_f2i(float *%s, float *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load float, float* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast float [[LS]] to i32
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load float, float* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast float [[LD]] to i32
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast float [[PHI]] to i32
-; CHECK-NEXT:    ret i32 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i32 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i32 [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i32 %n, 0
@ -198,14 +203,15 @@ define i16 @convphi_h2i(half *%s, half *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load half, half* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast half [[LS]] to i16
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load half, half* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast half [[LD]] to i16
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi half [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast half [[PHI]] to i16
-; CHECK-NEXT:    ret i16 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i16 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i16 [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i32 %n, 0
@ -232,14 +238,15 @@ define i128 @convphi_ld2i(fp128 *%s, fp128 *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load fp128, fp128* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast fp128 [[LS]] to i128
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load fp128, fp128* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast fp128 [[LD]] to i128
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi fp128 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast fp128 [[PHI]] to i128
-; CHECK-NEXT:    ret i128 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i128 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i128 [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i32 %n, 0
@ -298,18 +305,19 @@ define float @convphi_loop(i32 *%s, i32 *%d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LPHI:%.*]] = phi i32 [ [[LS]], [[ENTRY]] ], [ [[LD:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LD]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LPHI_TC:%.*]] = phi float [ [[LS_BC]], [[ENTRY]] ], [ [[LD_BC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI]], [[LOOP]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI_TC]], [[LOOP]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i64 %n, 0
@ -370,19 +378,20 @@ define float @convphi_loopdelayed2(i32 *%s, i32 *%d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LPHI:%.*]] = phi i32 [ [[LS]], [[ENTRY]] ], [ [[LD:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LPHI2:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI]], [[LOOP]] ]
-; CHECK-NEXT:    [[LD]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LPHI_TC:%.*]] = phi float [ [[LS_BC]], [[ENTRY]] ], [ [[LD_BC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LPHI2_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI_TC]], [[LOOP]] ]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI2]], [[LOOP]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI2_TC]], [[LOOP]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
  %cmp15 = icmp sgt i64 %n, 0
@ -409,31 +418,33 @@ define float @convphi_loopmore(i32 *%s, i32 *%d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 1
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[IFEND:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    br label [[IFEND]]
 ; CHECK:       ifend:
-; CHECK-NEXT:    [[PHI1:%.*]] = phi i32 [ [[LD]], [[THEN]] ], [ [[LS]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[PHI1_TC:%.*]] = phi float [ [[LD_BC]], [[THEN]] ], [ [[LS_BC]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N]], 0
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[IFEND]] ], [ [[IV_NEXT:%.*]], [[LOOPEND:%.*]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[IFEND]] ], [ [[PHI3:%.*]], [[LOOPEND]] ]
+; CHECK-NEXT:    [[PHI2_TC:%.*]] = phi float [ [[PHI1_TC]], [[IFEND]] ], [ [[PHI3_TC:%.*]], [[LOOPEND]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[N]], 1
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[LOOPTHEN:%.*]], label [[LOOPEND]]
 ; CHECK:       loopthen:
 ; CHECK-NEXT:    [[LL:%.*]] = load i32, i32* [[D]], align 4
+; CHECK-NEXT:    [[LL_BC:%.*]] = bitcast i32 [[LL]] to float
 ; CHECK-NEXT:    br label [[LOOPEND]]
 ; CHECK:       loopend:
-; CHECK-NEXT:    [[PHI3]] = phi i32 [ [[LL]], [[LOOPTHEN]] ], [ [[PHI2]], [[LOOP]] ]
+; CHECK-NEXT:    [[PHI3_TC]] = phi float [ [[LL_BC]], [[LOOPTHEN]] ], [ [[PHI2_TC]], [[LOOP]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[PHI1]], [[IFEND]] ], [ [[PHI3]], [[LOOPEND]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[PHI1_TC]], [[IFEND]] ], [ [[PHI3_TC]], [[LOOPEND]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
  %cmp = icmp eq i64 %n, 1
--- a/test/CodeGen/X86/convertphitype.ll
+++ b/test/CodeGen/X86/convertphitype.ll
@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -cgp-optimize-phi-types=true %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+define float @convphi1(i32 *%s, i32 *%d, i32 %n) {
+; CHECK-LABEL: @convphi1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
+; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
+; CHECK-NEXT:    ret float [[B]]
+;
+entry:
+  %cmp15 = icmp sgt i32 %n, 0
+  br i1 %cmp15, label %then, label %else
+
+then:
+  %ls = load i32, i32* %s, align 4
+  br label %end
+
+else:
+  %ld = load i32, i32* %d, align 4
+  br label %end
+
+end:
+  %phi = phi i32 [ %ls, %then ], [ %ld, %else ]
+  %b = bitcast i32 %phi to float
+  ret float %b
+}