From f05ebf08492f98a4d2eee9db3d9168d68f0ea74b Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 8 Mar 2009 04:17:04 +0000 Subject: [PATCH] teach SROA to handle promoting vector allocas with a memset into them into a vector type instead of into an integer type. llvm-svn: 66368 --- .../Scalar/ScalarReplAggregates.cpp | 36 ++++++++++--------- test/Transforms/ScalarRepl/vector_memcpy.ll | 18 ++++++++-- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 98d5a027013..78730b6eb72 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1350,8 +1350,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Store of constant value and constant size. if (isa(MSI->getValue()) && isa(MSI->getLength())) { - // FIXME (!): Why reset VecTy? - VecTy = Type::VoidTy; IsNotTrivial = true; continue; } @@ -1628,21 +1626,25 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, const Type *AllocaType = Old->getType(); if (const VectorType *VTy = dyn_cast(AllocaType)) { - // If the result alloca is a vector type, this is either an element - // access or a bitcast to another vector type. - if (isa(SV->getType())) { - SV = Builder.CreateBitCast(SV, AllocaType, "tmp"); - } else { - // Must be an element insertion. - unsigned Elt = Offset/TD->getTypePaddedSizeInBits(VTy->getElementType()); - - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); - - SV = Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::Int32Ty, Elt), - "tmp"); - } + uint64_t VecSize = TD->getTypePaddedSizeInBits(VTy); + uint64_t ValSize = TD->getTypePaddedSizeInBits(SV->getType()); + + // Changing the whole vector with memset or with an access of a different + // vector type? + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + + uint64_t EltSize = TD->getTypePaddedSizeInBits(VTy->getElementType()); + + // Must be an element insertion. + unsigned Elt = Offset/EltSize; + + if (SV->getType() != VTy->getElementType()) + SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + + SV = Builder.CreateInsertElement(Old, SV, + ConstantInt::get(Type::Int32Ty, Elt), + "tmp"); return SV; } diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll index dc947b0d715..56785788ff9 100644 --- a/test/Transforms/ScalarRepl/vector_memcpy.ll +++ b/test/Transforms/ScalarRepl/vector_memcpy.ll @@ -1,4 +1,7 @@ -; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret <16 x float> %A} +; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis > %t +; RUN: grep {ret <16 x float> %A} %t +; RUN: grep {ret <16 x float> zeroinitializer} %t + define <16 x float> @foo(<16 x float> %A) nounwind { %tmp = alloca <16 x float>, align 16 %tmp2 = alloca <16 x float>, align 16 @@ -11,5 +14,16 @@ define <16 x float> @foo(<16 x float> %A) nounwind { ret <16 x float> %R } -declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +define <16 x float> @foo2(<16 x float> %A) nounwind { + %tmp2 = alloca <16 x float>, align 16 + %s2 = bitcast <16 x float>* %tmp2 to i8* + call void @llvm.memset.i64(i8* %s2, i8 0, i64 64, i32 16) + + %R = load <16 x float>* %tmp2 + ret <16 x float> %R +} + + +declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind