diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index a6c3a5e3e94..9fc4164cc0e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1642,6 +1642,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); if (GEPIt == E) return; + bool NonConstant = false; + unsigned NonConstantIdxSize = 0; // Walk through the GEP type indices, checking the types that this indexes // into. @@ -1651,15 +1653,30 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, continue; ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info, GEPI); + if (!IdxVal) { + // Non constant GEPs are only a problem on arrays, structs, and pointers + // Vectors can be dynamically indexed. + // FIXME: Add support for dynamic indexing on arrays. This should be + // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0] + // isn't. + if (!(*GEPIt)->isVectorTy()) + return MarkUnsafe(Info, GEPI); + NonConstant = true; + NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt); + } } // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + // If this GEP is non constant then the last operand must have been a + // dynamic index into a vector. Pop this now as it has no impact on the + // constant part of the offset. + if (NonConstant) + Indices.pop_back(); Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); - if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) + if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, + NonConstantIdxSize)) MarkUnsafe(Info, GEPI); } @@ -1961,6 +1978,13 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector &NewElts) { uint64_t OldOffset = Offset; SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + // If the GEP was dynamic then it must have been a dynamic vector lookup. + // In this case, it must be the last GEP operand which is dynamic so keep that + // aside until we've found the constant GEP offset then add it back in at the + // end. + Value* NonConstantIdx = 0; + if (!GEPI->hasAllConstantIndices()) + NonConstantIdx = Indices.pop_back_val(); Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); @@ -1987,6 +2011,8 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); } + if (NonConstantIdx) + NewArgs.push_back(NonConstantIdx); Instruction *Val = NewElts[Idx]; if (NewArgs.size() > 1) { Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll new file mode 100644 index 00000000000..48a0da9dabd --- /dev/null +++ b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -scalarrepl -S | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK: @test1 +; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float> +; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float> +; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]] + +; Split the array but don't replace the memset with an insert +; element as its not a constant offset. +define float @test1(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 + %cast = bitcast float* %ptr1 to i8* + call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false) + %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: @test2 +; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float> +; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]] +; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx1 +; CHECK: store float 1.000000e+00, float* %ptr1 +; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx2 +; CHECK: %ret = load float* %ptr2 +; CHECK: ret float %ret + +; Do SROA on the array when it has dynamic vector reads and writes. +define float @test2(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: test3 +; CHECK: %0 = alloca [4 x <4 x float>] +; CHECK-NOT: alloca + +; Don't do SROA on a dynamically indexed vector when it spans +; more than one array element of the alloca array it is within. +define float @test3(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>* + %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: @test4 +; CHECK: %0 = alloca [4 x <4 x float>] +; CHECK-NOT: alloca + +; Don't do SROA as the is a second dynamically indexed array +; which may span multiple elements of the alloca. +define float @test4(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 + %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]* + %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2 + %ret = load float* %ptr4 + ret float %ret +} + +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)