From f05ebf08492f98a4d2eee9db3d9168d68f0ea74b Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Sun, 8 Mar 2009 04:17:04 +0000
Subject: [PATCH] teach SROA to handle promoting vector allocas with a memset
 into them into a vector type instead of into an integer type.

llvm-svn: 66368
---
 .../Scalar/ScalarReplAggregates.cpp           | 36 ++++++++++---------
 test/Transforms/ScalarRepl/vector_memcpy.ll   | 18 ++++++++--
 2 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 98d5a027013..78730b6eb72 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1350,8 +1350,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
       // Store of constant value and constant size.
       if (isa<ConstantInt>(MSI->getValue()) &&
           isa<ConstantInt>(MSI->getLength())) {
-        // FIXME (!): Why reset VecTy?
-        VecTy = Type::VoidTy;
         IsNotTrivial = true;
         continue;
       }
@@ -1628,21 +1626,25 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   const Type *AllocaType = Old->getType();
 
   if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
-    // If the result alloca is a vector type, this is either an element
-    // access or a bitcast to another vector type.
-    if (isa<VectorType>(SV->getType())) {
-      SV = Builder.CreateBitCast(SV, AllocaType, "tmp");
-    } else {
-      // Must be an element insertion.
-      unsigned Elt = Offset/TD->getTypePaddedSizeInBits(VTy->getElementType());
-      
-      if (SV->getType() != VTy->getElementType())
-        SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
-      
-      SV = Builder.CreateInsertElement(Old, SV, 
-                                       ConstantInt::get(Type::Int32Ty, Elt), 
-                                       "tmp");
-    }
+    uint64_t VecSize = TD->getTypePaddedSizeInBits(VTy);
+    uint64_t ValSize = TD->getTypePaddedSizeInBits(SV->getType());
+    
+    // Changing the whole vector with memset or with an access of a different
+    // vector type?
+    if (ValSize == VecSize)
+      return Builder.CreateBitCast(SV, AllocaType, "tmp");
+
+    uint64_t EltSize = TD->getTypePaddedSizeInBits(VTy->getElementType());
+
+    // Must be an element insertion.
+    unsigned Elt = Offset/EltSize;
+    
+    if (SV->getType() != VTy->getElementType())
+      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+    
+    SV = Builder.CreateInsertElement(Old, SV, 
+                                     ConstantInt::get(Type::Int32Ty, Elt),
+                                     "tmp");
     return SV;
   }
   
diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll
index dc947b0d715..56785788ff9 100644
--- a/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -1,4 +1,7 @@
-; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret <16 x float> %A}
+; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis > %t
+; RUN: grep {ret <16 x float> %A} %t
+; RUN: grep {ret <16 x float> zeroinitializer} %t
+
 define <16 x float> @foo(<16 x float> %A) nounwind {
 	%tmp = alloca <16 x float>, align 16
 	%tmp2 = alloca <16 x float>, align 16
@@ -11,5 +14,16 @@ define <16 x float> @foo(<16 x float> %A) nounwind {
 	ret <16 x float> %R
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+define <16 x float> @foo2(<16 x float> %A) nounwind {
+	%tmp2 = alloca <16 x float>, align 16
 
+	%s2 = bitcast <16 x float>* %tmp2 to i8*
+	call void @llvm.memset.i64(i8* %s2, i8 0, i64 64, i32 16)
+	
+	%R = load <16 x float>* %tmp2
+	ret <16 x float> %R
+}
+
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind