mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[AMDGPU] Fix promote alloca which is already vector
Just do not touch loads and stores which are already vector. Previously pass was just unable to see these loads and stores because these were hidden bitcasts. Differential Revision: https://reviews.llvm.org/D79738
This commit is contained in:
parent
1acbe6fc41
commit
4a88942e02
@ -468,7 +468,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) {
|
||||
IRBuilder<> Builder(Inst);
|
||||
switch (Inst->getOpcode()) {
|
||||
case Instruction::Load: {
|
||||
if (Inst->getType() == AllocaTy)
|
||||
if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
|
||||
break;
|
||||
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
|
||||
@ -486,7 +486,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) {
|
||||
}
|
||||
case Instruction::Store: {
|
||||
StoreInst *SI = cast<StoreInst>(Inst);
|
||||
if (SI->getValueOperand()->getType() == AllocaTy)
|
||||
if (SI->getValueOperand()->getType() == AllocaTy ||
|
||||
SI->getValueOperand()->getType()->isVectorTy())
|
||||
break;
|
||||
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
@ -345,6 +345,110 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @bitcast_vector_to_vector(
|
||||
; OPT-NOT: alloca
|
||||
; OPT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(1)* %out, align 16
|
||||
|
||||
; GCN-LABEL: {{^}}bitcast_vector_to_vector:
|
||||
; GCN: v_mov_b32_e32 v0, 1
|
||||
; GCN: v_mov_b32_e32 v1, 2
|
||||
; GCN: v_mov_b32_e32 v2, 3
|
||||
; GCN: v_mov_b32_e32 v3, 4
|
||||
|
||||
; GCN: ScratchSize: 0
|
||||
|
||||
define amdgpu_kernel void @bitcast_vector_to_vector(<4 x i32> addrspace(1)* %out) {
|
||||
.entry:
|
||||
%alloca = alloca <4 x float>, align 16, addrspace(5)
|
||||
%cast = bitcast <4 x float> addrspace(5)* %alloca to <4 x i32> addrspace(5)*
|
||||
store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %cast
|
||||
%load = load <4 x i32>, <4 x i32> addrspace(5)* %cast, align 16
|
||||
store <4 x i32> %load, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @vector_bitcast_from_alloca_array(
|
||||
; OPT-NOT: alloca
|
||||
; OPT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(1)* %out, align 16
|
||||
|
||||
; GCN-LABEL: {{^}}vector_bitcast_from_alloca_array:
|
||||
; GCN: v_mov_b32_e32 v0, 1
|
||||
; GCN: v_mov_b32_e32 v1, 2
|
||||
; GCN: v_mov_b32_e32 v2, 3
|
||||
; GCN: v_mov_b32_e32 v3, 4
|
||||
|
||||
; GCN: ScratchSize: 0
|
||||
|
||||
define amdgpu_kernel void @vector_bitcast_from_alloca_array(<4 x i32> addrspace(1)* %out) {
|
||||
.entry:
|
||||
%alloca = alloca [4 x float], align 16, addrspace(5)
|
||||
%cast = bitcast [4 x float] addrspace(5)* %alloca to <4 x i32> addrspace(5)*
|
||||
store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %cast
|
||||
%load = load <4 x i32>, <4 x i32> addrspace(5)* %cast, align 16
|
||||
store <4 x i32> %load, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @vector_bitcast_to_array_from_alloca_array(
|
||||
; OPT-NOT: alloca
|
||||
; OPT: %out.repack = getelementptr inbounds [4 x i32], [4 x i32] addrspace(1)* %out, i64 0, i64 0
|
||||
; OPT-NEXT: store i32 1, i32 addrspace(1)* %out.repack, align 4
|
||||
; OPT-NEXT: %out.repack1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(1)* %out, i64 0, i64 1
|
||||
; OPT-NEXT: store i32 2, i32 addrspace(1)* %out.repack1, align 4
|
||||
; OPT-NEXT: %out.repack2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(1)* %out, i64 0, i64 2
|
||||
; OPT-NEXT: store i32 3, i32 addrspace(1)* %out.repack2, align 4
|
||||
; OPT-NEXT: %out.repack3 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(1)* %out, i64 0, i64 3
|
||||
; OPT-NEXT: store i32 4, i32 addrspace(1)* %out.repack3, align 4
|
||||
|
||||
; GCN-LABEL: {{^}}vector_bitcast_to_array_from_alloca_array:
|
||||
; GCN: v_mov_b32_e32 v0, 1
|
||||
; GCN: v_mov_b32_e32 v1, 2
|
||||
; GCN: v_mov_b32_e32 v2, 3
|
||||
; GCN: v_mov_b32_e32 v3, 4
|
||||
|
||||
; GCN: ScratchSize: 0
|
||||
|
||||
define amdgpu_kernel void @vector_bitcast_to_array_from_alloca_array([4 x i32] addrspace(1)* %out) {
|
||||
.entry:
|
||||
%alloca = alloca [4 x float], align 16, addrspace(5)
|
||||
%cast = bitcast [4 x float] addrspace(5)* %alloca to [4 x i32] addrspace(5)*
|
||||
store [4 x i32] [i32 1, i32 2, i32 3, i32 4], [4 x i32] addrspace(5)* %cast
|
||||
%load = load [4 x i32], [4 x i32] addrspace(5)* %cast, align 16
|
||||
store [4 x i32] %load, [4 x i32] addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @vector_bitcast_to_struct_from_alloca_array(
|
||||
; OPT-NOT: alloca
|
||||
; OPT: %out.repack = getelementptr inbounds %struct.v4, %struct.v4 addrspace(1)* %out, i64 0, i32 0
|
||||
; OPT-NEXT: store i32 1, i32 addrspace(1)* %out.repack, align 4
|
||||
; OPT-NEXT: %out.repack1 = getelementptr inbounds %struct.v4, %struct.v4 addrspace(1)* %out, i64 0, i32 1
|
||||
; OPT-NEXT: store i32 2, i32 addrspace(1)* %out.repack1, align 4
|
||||
; OPT-NEXT: %out.repack2 = getelementptr inbounds %struct.v4, %struct.v4 addrspace(1)* %out, i64 0, i32 2
|
||||
; OPT-NEXT: store i32 3, i32 addrspace(1)* %out.repack2, align 4
|
||||
; OPT-NEXT: %out.repack3 = getelementptr inbounds %struct.v4, %struct.v4 addrspace(1)* %out, i64 0, i32 3
|
||||
; OPT-NEXT: store i32 4, i32 addrspace(1)* %out.repack3, align 4
|
||||
|
||||
; GCN-LABEL: {{^}}vector_bitcast_to_struct_from_alloca_array:
|
||||
; GCN: v_mov_b32_e32 v0, 1
|
||||
; GCN: v_mov_b32_e32 v1, 2
|
||||
; GCN: v_mov_b32_e32 v2, 3
|
||||
; GCN: v_mov_b32_e32 v3, 4
|
||||
|
||||
; GCN: ScratchSize: 0
|
||||
|
||||
%struct.v4 = type { i32, i32, i32, i32 }
|
||||
|
||||
define amdgpu_kernel void @vector_bitcast_to_struct_from_alloca_array(%struct.v4 addrspace(1)* %out) {
|
||||
.entry:
|
||||
%alloca = alloca [4 x float], align 16, addrspace(5)
|
||||
%cast = bitcast [4 x float] addrspace(5)* %alloca to %struct.v4 addrspace(5)*
|
||||
store %struct.v4 { i32 1, i32 2, i32 3, i32 4 }, %struct.v4 addrspace(5)* %cast
|
||||
%load = load %struct.v4, %struct.v4 addrspace(5)* %cast, align 16
|
||||
store %struct.v4 %load, %struct.v4 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.lifetime.start.p5i8(i64 immarg, i8 addrspace(5)* nocapture)
|
||||
|
||||
declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture)
|
||||
|
Loading…
Reference in New Issue
Block a user