mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
GlobalISel: Preserve memory types for implicit sret load/stores
This commit is contained in:
parent
2fbf734dc0
commit
a689a41a7a
@ -791,7 +791,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
|
||||
Register Addr;
|
||||
MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
|
||||
auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
|
||||
MRI.getType(VRegs[I]).getSizeInBytes(),
|
||||
MRI.getType(VRegs[I]),
|
||||
commonAlignment(BaseAlign, Offsets[I]));
|
||||
MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
|
||||
}
|
||||
@ -822,7 +822,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
|
||||
Register Addr;
|
||||
MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
|
||||
auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
|
||||
MRI.getType(VRegs[I]).getSizeInBytes(),
|
||||
MRI.getType(VRegs[I]),
|
||||
commonAlignment(BaseAlign, Offsets[I]));
|
||||
MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
|
||||
}
|
||||
|
@ -1143,7 +1143,7 @@ define <33 x i32> @v33i32_func_void() #0 {
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
|
||||
; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1)
|
||||
; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]]
|
||||
%ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
|
||||
@ -1167,7 +1167,7 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1)
|
||||
; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY6]]
|
||||
%gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx
|
||||
@ -1187,7 +1187,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
|
||||
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1)
|
||||
; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (s1024), addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
|
||||
; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5)
|
||||
@ -1213,7 +1213,7 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
|
||||
; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
|
||||
; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (s1024), addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]]
|
||||
%ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
|
||||
@ -1490,16 +1490,16 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
|
||||
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
|
||||
; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1)
|
||||
; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (s1024), addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5)
|
||||
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
|
||||
; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (s32), align 128, addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5)
|
||||
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136
|
||||
; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
|
||||
; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (s64), addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5)
|
||||
; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144
|
||||
; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
|
||||
; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (s128), addrspace 5)
|
||||
; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]]
|
||||
%val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef
|
||||
|
@ -2684,7 +2684,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
|
||||
; GCN: $vgpr31 = COPY [[OR1]](s32)
|
||||
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
|
||||
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
||||
; GCN: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1024) from %stack.0, addrspace 5)
|
||||
; GCN: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
|
||||
; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||
; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
|
||||
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
|
||||
@ -2752,7 +2752,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
|
||||
; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
|
||||
; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||
; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
|
||||
; GCN: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (s1024) from %stack.0, addrspace 5)
|
||||
; GCN: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
|
||||
; GCN: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; GCN: G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; GCN: S_ENDPGM 0
|
||||
@ -2813,7 +2813,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
|
||||
; GCN: $vgpr31 = COPY [[OR1]](s32)
|
||||
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
|
||||
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
||||
; GCN: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5)
|
||||
; GCN: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
|
||||
; GCN: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; GCN: S_ENDPGM 0
|
||||
%val = call <33 x i32> @external_v33i32_func_void()
|
||||
@ -2879,7 +2879,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
|
||||
; GCN: $vgpr31 = COPY [[OR1]](s32)
|
||||
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
|
||||
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
||||
; GCN: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5)
|
||||
; GCN: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
|
||||
; GCN: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; GCN: S_ENDPGM 0
|
||||
%val = call <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
|
||||
|
Loading…
x
Reference in New Issue
Block a user