mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Allow SROA to split up an array of vectors into multiple vectors, even when the vectors are dynamically indexed
llvm-svn: 158529
This commit is contained in:
parent
9a1bc0fa81
commit
d64dbc9162
@ -1642,6 +1642,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
|
||||
gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
|
||||
if (GEPIt == E)
|
||||
return;
|
||||
bool NonConstant = false;
|
||||
unsigned NonConstantIdxSize = 0;
|
||||
|
||||
// Walk through the GEP type indices, checking the types that this indexes
|
||||
// into.
|
||||
@ -1651,15 +1653,30 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
|
||||
continue;
|
||||
|
||||
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
|
||||
if (!IdxVal)
|
||||
if (!IdxVal) {
|
||||
// Non constant GEPs are only a problem on arrays, structs, and pointers
|
||||
// Vectors can be dynamically indexed.
|
||||
// FIXME: Add support for dynamic indexing on arrays. This should be
|
||||
// ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
|
||||
// isn't.
|
||||
if (!(*GEPIt)->isVectorTy())
|
||||
return MarkUnsafe(Info, GEPI);
|
||||
NonConstant = true;
|
||||
NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the offset due to this GEP and check if the alloca has a
|
||||
// component element at that offset.
|
||||
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
|
||||
// If this GEP is non constant then the last operand must have been a
|
||||
// dynamic index into a vector. Pop this now as it has no impact on the
|
||||
// constant part of the offset.
|
||||
if (NonConstant)
|
||||
Indices.pop_back();
|
||||
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
|
||||
if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
|
||||
if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
|
||||
NonConstantIdxSize))
|
||||
MarkUnsafe(Info, GEPI);
|
||||
}
|
||||
|
||||
@ -1961,6 +1978,13 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
|
||||
SmallVector<AllocaInst*, 32> &NewElts) {
|
||||
uint64_t OldOffset = Offset;
|
||||
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
|
||||
// If the GEP was dynamic then it must have been a dynamic vector lookup.
|
||||
// In this case, it must be the last GEP operand which is dynamic so keep that
|
||||
// aside until we've found the constant GEP offset then add it back in at the
|
||||
// end.
|
||||
Value* NonConstantIdx = 0;
|
||||
if (!GEPI->hasAllConstantIndices())
|
||||
NonConstantIdx = Indices.pop_back_val();
|
||||
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
|
||||
|
||||
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
|
||||
@ -1987,6 +2011,8 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
|
||||
uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
|
||||
NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
|
||||
}
|
||||
if (NonConstantIdx)
|
||||
NewArgs.push_back(NonConstantIdx);
|
||||
Instruction *Val = NewElts[Idx];
|
||||
if (NewArgs.size() > 1) {
|
||||
Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
|
||||
|
83
test/Transforms/ScalarRepl/dynamic-vector-gep.ll
Normal file
83
test/Transforms/ScalarRepl/dynamic-vector-gep.ll
Normal file
@ -0,0 +1,83 @@
|
||||
; RUN: opt < %s -scalarrepl -S | FileCheck %s
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
; CHECK: @test1
|
||||
; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
|
||||
; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
|
||||
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
|
||||
|
||||
; Split the array but don't replace the memset with an insert
|
||||
; element as its not a constant offset.
|
||||
define float @test1(i32 %idx1, i32 %idx2) {
|
||||
entry:
|
||||
%0 = alloca [4 x <4 x float>]
|
||||
store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
|
||||
%ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
|
||||
%cast = bitcast float* %ptr1 to i8*
|
||||
call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
|
||||
%ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
|
||||
%ret = load float* %ptr2
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; CHECK: @test2
|
||||
; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
|
||||
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
|
||||
; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx1
|
||||
; CHECK: store float 1.000000e+00, float* %ptr1
|
||||
; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx2
|
||||
; CHECK: %ret = load float* %ptr2
|
||||
; CHECK: ret float %ret
|
||||
|
||||
; Do SROA on the array when it has dynamic vector reads and writes.
|
||||
define float @test2(i32 %idx1, i32 %idx2) {
|
||||
entry:
|
||||
%0 = alloca [4 x <4 x float>]
|
||||
store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
|
||||
%ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
|
||||
store float 1.0, float* %ptr1
|
||||
%ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
|
||||
%ret = load float* %ptr2
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; CHECK: test3
|
||||
; CHECK: %0 = alloca [4 x <4 x float>]
|
||||
; CHECK-NOT: alloca
|
||||
|
||||
; Don't do SROA on a dynamically indexed vector when it spans
|
||||
; more than one array element of the alloca array it is within.
|
||||
define float @test3(i32 %idx1, i32 %idx2) {
|
||||
entry:
|
||||
%0 = alloca [4 x <4 x float>]
|
||||
store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
|
||||
%bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
|
||||
%ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
|
||||
store float 1.0, float* %ptr1
|
||||
%ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
|
||||
%ret = load float* %ptr2
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; CHECK: @test4
|
||||
; CHECK: %0 = alloca [4 x <4 x float>]
|
||||
; CHECK-NOT: alloca
|
||||
|
||||
; Don't do SROA as the is a second dynamically indexed array
|
||||
; which may span multiple elements of the alloca.
|
||||
define float @test4(i32 %idx1, i32 %idx2) {
|
||||
entry:
|
||||
%0 = alloca [4 x <4 x float>]
|
||||
store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
|
||||
%ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
|
||||
%ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
|
||||
%ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
|
||||
store float 1.0, float* %ptr1
|
||||
%ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
|
||||
%ret = load float* %ptr4
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
|
Loading…
Reference in New Issue
Block a user