1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[AMDGPU] Bail alloca vectorization if GEP not found

Differential Revision: https://reviews.llvm.org/D80587
This commit is contained in:
Stanislav Mekhanoshin 2020-05-26 13:30:19 -07:00
parent 9e6372bda0
commit d18295dea3
2 changed files with 28 additions and 3 deletions

View File

@ -339,7 +339,9 @@ static Value *stripBitcasts(Value *V) {
static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
GetElementPtrInst *GEP = cast<GetElementPtrInst>(stripBitcasts(Ptr));
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(stripBitcasts(Ptr));
if (!GEP)
return nullptr;
auto I = GEPIdx.find(GEP);
return I == GEPIdx.end() ? nullptr : I->second;
@ -496,10 +498,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) {
if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
break;
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
if (!Index)
break;
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
@ -515,9 +519,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) {
SI->getValueOperand()->getType()->isVectorTy())
break;
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
if (!Index)
break;
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *Elt = SI->getValueOperand();

View File

@ -189,5 +189,23 @@ entry:
ret void
}
; GCN-LABEL: {{^}}ptr_alloca_bitcast:
; OPT-LABEL: define i64 @ptr_alloca_bitcast
; GCN-NOT: buffer_
; GCN: v_mov_b32_e32 v1, 0
; OPT: %private_iptr = alloca <2 x i32>, align 8, addrspace(5)
; OPT: %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)*
; OPT: %tmp1 = load i64, i64 addrspace(5)* %cast, align 8
define i64 @ptr_alloca_bitcast() {
entry:
%private_iptr = alloca <2 x i32>, align 8, addrspace(5)
%cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)*
%tmp1 = load i64, i64 addrspace(5)* %cast, align 8
ret i64 %tmp1
}
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.workitem.id.y()