mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
IR: Have byref imply dereferenceable
The langref already states it does, but this wasn't implemented. Also covers inalloca and preallocated. Also helps fix a dependence on pointer element types.
This commit is contained in:
parent
8bd5d0338f
commit
73fd87a05e
@ -712,11 +712,16 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
|
||||
CanBeNull = false;
|
||||
if (const Argument *A = dyn_cast<Argument>(this)) {
|
||||
DerefBytes = A->getDereferenceableBytes();
|
||||
if (DerefBytes == 0 && (A->hasByValAttr() || A->hasStructRetAttr())) {
|
||||
Type *PT = cast<PointerType>(A->getType())->getElementType();
|
||||
if (PT->isSized())
|
||||
DerefBytes = DL.getTypeStoreSize(PT).getKnownMinSize();
|
||||
if (DerefBytes == 0) {
|
||||
// Handle byval/byref/inalloca/preallocated arguments
|
||||
if (Type *ArgMemTy = A->getPointeeInMemoryValueType()) {
|
||||
if (ArgMemTy->isSized()) {
|
||||
// FIXME: Why isn't this the type alloc size?
|
||||
DerefBytes = DL.getTypeStoreSize(ArgMemTy).getKnownMinSize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DerefBytes == 0) {
|
||||
DerefBytes = A->getDereferenceableOrNullBytes();
|
||||
CanBeNull = true;
|
||||
|
@ -1277,7 +1277,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
|
||||
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
@ -1290,7 +1290,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
|
||||
; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
@ -1310,7 +1310,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
|
||||
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
@ -1323,7 +1323,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
|
||||
; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
@ -1346,7 +1346,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
@ -1362,7 +1362,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
@ -1385,7 +1385,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
@ -1401,7 +1401,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
@ -1425,7 +1425,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
@ -1441,7 +1441,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
@ -1464,7 +1464,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 64 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
@ -1480,7 +1480,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 64 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
@ -1504,7 +1504,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.1, addrspace 1)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg
|
||||
@ -1518,7 +1518,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.1, addrspace 1)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%in = load i32, i32 addrspace(1)* %in.byref
|
||||
@ -1537,7 +1537,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (load 4 from %ir.in.byref)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg
|
||||
@ -1550,7 +1550,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (load 4 from %ir.in.byref)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%in = load i32, i32* %in.byref
|
||||
@ -1569,7 +1569,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (load 4 from %ir.in.byref, addrspace 6)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg
|
||||
@ -1582,7 +1582,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (load 4 from %ir.in.byref, addrspace 6)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%in = load i32, i32 addrspace(6)* %in.byref
|
||||
@ -1601,7 +1601,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (load 4 from %ir.in.byref, addrspace 999)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg
|
||||
@ -1614,7 +1614,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (load 4 from %ir.in.byref, addrspace 999)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%in = load i32, i32 addrspace(999)* %in.byref
|
||||
@ -1634,7 +1634,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (load 4 from %ir.in.byref, addrspace 3)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg
|
||||
@ -1647,7 +1647,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (load 4 from %ir.in.byref, addrspace 3)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%in = load i32, i32 addrspace(3)* %in.byref
|
||||
@ -1670,8 +1670,8 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
|
||||
; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in0.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 4 from %ir.in1.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
@ -1690,8 +1690,8 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
|
||||
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 4 from %ir.in0.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 4 from %ir.in1.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
|
||||
@ -1712,7 +1712,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
|
||||
@ -1722,7 +1722,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%in = load i32, i32 addrspace(4)* %in.byref
|
||||
|
@ -106,12 +106,12 @@ define void @test6_2(i8** %p, i8* %q) {
|
||||
define void @test7_1(i32* inalloca %a) {
|
||||
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
|
||||
; IS__TUNIT____-LABEL: define {{[^@]+}}@test7_1
|
||||
; IS__TUNIT____-SAME: (i32* inalloca nocapture nofree writeonly [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__TUNIT____-SAME: (i32* inalloca nocapture nofree nonnull writeonly dereferenceable(4) [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__TUNIT____-NEXT: ret void
|
||||
;
|
||||
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
|
||||
; IS__CGSCC____-LABEL: define {{[^@]+}}@test7_1
|
||||
; IS__CGSCC____-SAME: (i32* inalloca nocapture nofree writeonly [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__CGSCC____-SAME: (i32* inalloca nocapture nofree nonnull writeonly dereferenceable(4) [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__CGSCC____-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
@ -163,11 +163,17 @@ declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32,
|
||||
; CHECK-NOT: readnone
|
||||
; CHECK-NOT: readonly
|
||||
define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
|
||||
; CHECK: Function Attrs: nounwind willreturn
|
||||
; CHECK-LABEL: define {{[^@]+}}@test9
|
||||
; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] {
|
||||
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) [[ATTR11:#.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
; IS__TUNIT____: Function Attrs: nounwind willreturn writeonly
|
||||
; IS__TUNIT____-LABEL: define {{[^@]+}}@test9
|
||||
; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] {
|
||||
; IS__TUNIT____-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) [[ATTR11:#.*]]
|
||||
; IS__TUNIT____-NEXT: ret void
|
||||
;
|
||||
; IS__CGSCC____: Function Attrs: nounwind willreturn writeonly
|
||||
; IS__CGSCC____-LABEL: define {{[^@]+}}@test9
|
||||
; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] {
|
||||
; IS__CGSCC____-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) [[ATTR12:#.*]]
|
||||
; IS__CGSCC____-NEXT: ret void
|
||||
;
|
||||
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
|
||||
ret void
|
||||
@ -176,11 +182,17 @@ define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
|
||||
; CHECK: declare <4 x i32> @llvm.masked.gather
|
||||
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
|
||||
define <4 x i32> @test10(<4 x i32*> %ptrs) {
|
||||
; CHECK: Function Attrs: nounwind readonly willreturn
|
||||
; CHECK-LABEL: define {{[^@]+}}@test10
|
||||
; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] {
|
||||
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) [[ATTR12:#.*]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[RES]]
|
||||
; IS__TUNIT____: Function Attrs: nounwind readonly willreturn
|
||||
; IS__TUNIT____-LABEL: define {{[^@]+}}@test10
|
||||
; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] {
|
||||
; IS__TUNIT____-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) [[ATTR12:#.*]]
|
||||
; IS__TUNIT____-NEXT: ret <4 x i32> [[RES]]
|
||||
;
|
||||
; IS__CGSCC____: Function Attrs: nounwind readonly willreturn
|
||||
; IS__CGSCC____-LABEL: define {{[^@]+}}@test10
|
||||
; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] {
|
||||
; IS__CGSCC____-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) [[ATTR13:#.*]]
|
||||
; IS__CGSCC____-NEXT: ret <4 x i32> [[RES]]
|
||||
;
|
||||
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
|
||||
ret <4 x i32> %res
|
||||
@ -202,11 +214,17 @@ define <4 x i32> @test11_2(<4 x i32*> %ptrs) {
|
||||
declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind
|
||||
; CHECK-NOT: readnone
|
||||
define <4 x i32> @test12_2(<4 x i32*> %ptrs) {
|
||||
; CHECK: Function Attrs: argmemonly nounwind
|
||||
; CHECK-LABEL: define {{[^@]+}}@test12_2
|
||||
; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] {
|
||||
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR13:#.*]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[RES]]
|
||||
; IS__TUNIT____: Function Attrs: argmemonly nounwind
|
||||
; IS__TUNIT____-LABEL: define {{[^@]+}}@test12_2
|
||||
; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] {
|
||||
; IS__TUNIT____-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR13:#.*]]
|
||||
; IS__TUNIT____-NEXT: ret <4 x i32> [[RES]]
|
||||
;
|
||||
; IS__CGSCC____: Function Attrs: argmemonly nounwind
|
||||
; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_2
|
||||
; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] {
|
||||
; IS__CGSCC____-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR14:#.*]]
|
||||
; IS__CGSCC____-NEXT: ret <4 x i32> [[RES]]
|
||||
;
|
||||
%res = call <4 x i32> @test12_1(<4 x i32*> %ptrs)
|
||||
ret <4 x i32> %res
|
||||
|
@ -332,48 +332,48 @@ define i32 @ipccp3() {
|
||||
define internal i32* @test_inalloca(i32* inalloca %a) {
|
||||
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
|
||||
; IS__TUNIT____-LABEL: define {{[^@]+}}@test_inalloca
|
||||
; IS__TUNIT____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__TUNIT____-SAME: (i32* inalloca noalias nofree nonnull returned writeonly dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__TUNIT____-NEXT: ret i32* [[A]]
|
||||
;
|
||||
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
|
||||
; IS__CGSCC____-LABEL: define {{[^@]+}}@test_inalloca
|
||||
; IS__CGSCC____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__CGSCC____-SAME: (i32* inalloca noalias nofree nonnull returned writeonly dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__CGSCC____-NEXT: ret i32* [[A]]
|
||||
;
|
||||
ret i32* %a
|
||||
}
|
||||
define i32* @complicated_args_inalloca() {
|
||||
define i32* @complicated_args_inalloca(i32* %arg) {
|
||||
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
|
||||
; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_inalloca
|
||||
; IS__TUNIT____-SAME: () [[ATTR1]] {
|
||||
; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR1]]
|
||||
; IS__TUNIT____-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[ARG:%.*]]) [[ATTR1]] {
|
||||
; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree writeonly "no-capture-maybe-returned" [[ARG]]) [[ATTR1]]
|
||||
; IS__TUNIT____-NEXT: ret i32* [[CALL]]
|
||||
;
|
||||
; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
|
||||
; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_inalloca
|
||||
; IS__CGSCC_OPM-SAME: () [[ATTR1:#.*]] {
|
||||
; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR5:#.*]]
|
||||
; IS__CGSCC_OPM-SAME: (i32* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[ARG:%.*]]) [[ATTR1:#.*]] {
|
||||
; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree nonnull writeonly dereferenceable(4) "no-capture-maybe-returned" [[ARG]]) [[ATTR5:#.*]]
|
||||
; IS__CGSCC_OPM-NEXT: ret i32* [[CALL]]
|
||||
;
|
||||
; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
|
||||
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_inalloca
|
||||
; IS__CGSCC_NPM-SAME: () [[ATTR1:#.*]] {
|
||||
; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR4:#.*]]
|
||||
; IS__CGSCC_NPM-SAME: (i32* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[ARG:%.*]]) [[ATTR1:#.*]] {
|
||||
; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree nonnull writeonly dereferenceable(4) "no-capture-maybe-returned" [[ARG]]) [[ATTR4:#.*]]
|
||||
; IS__CGSCC_NPM-NEXT: ret i32* [[CALL]]
|
||||
;
|
||||
%call = call i32* @test_inalloca(i32* null)
|
||||
%call = call i32* @test_inalloca(i32* %arg)
|
||||
ret i32* %call
|
||||
}
|
||||
|
||||
define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
|
||||
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
|
||||
; IS__TUNIT____-LABEL: define {{[^@]+}}@test_preallocated
|
||||
; IS__TUNIT____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__TUNIT____-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__TUNIT____-NEXT: ret i32* [[A]]
|
||||
;
|
||||
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
|
||||
; IS__CGSCC____-LABEL: define {{[^@]+}}@test_preallocated
|
||||
; IS__CGSCC____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__CGSCC____-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
|
||||
; IS__CGSCC____-NEXT: ret i32* [[A]]
|
||||
;
|
||||
ret i32* %a
|
||||
|
@ -23,6 +23,47 @@ define i8 @memcpy_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias rea
|
||||
ret i8 %load
|
||||
}
|
||||
|
||||
; Simple memcpy to alloca from byref constant address space argument.
|
||||
define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 byref([32 x i8]) %arg, i8 addrspace(1)* %out, i32 %idx) {
|
||||
; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
|
||||
; CHECK-NEXT: store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%alloca = alloca [32 x i8], align 4, addrspace(5)
|
||||
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
|
||||
%arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 32, i1 false)
|
||||
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
|
||||
%load = load i8, i8 addrspace(5)* %gep
|
||||
store i8 %load, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Simple memcpy to alloca from byref constant address space argument, but not enough bytes are dereferenceable
|
||||
define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca_too_many_bytes([31 x i8] addrspace(4)* noalias readonly align 4 byref([31 x i8]) %arg, i8 addrspace(1)* %out, i32 %idx) {
|
||||
; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca_too_many_bytes(
|
||||
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5)
|
||||
; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0
|
||||
; CHECK-NEXT: [[ARG_CAST:%.*]] = getelementptr inbounds [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 0
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 dereferenceable(31) [[ALLOCA_CAST]], i8 addrspace(4)* align 4 dereferenceable(31) [[ARG_CAST]], i64 31, i1 false)
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1
|
||||
; CHECK-NEXT: store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%alloca = alloca [32 x i8], align 4, addrspace(5)
|
||||
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
|
||||
%arg.cast = bitcast [31 x i8] addrspace(4)* %arg to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 31, i1 false)
|
||||
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
|
||||
%load = load i8, i8 addrspace(5)* %gep
|
||||
store i8 %load, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Simple memcpy to alloca from constant address space intrinsic call
|
||||
define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(1)* %out, i32 %idx) {
|
||||
; CHECK-LABEL: @memcpy_constant_intrinsic_ptr_to_alloca(
|
||||
@ -44,18 +85,18 @@ define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(
|
||||
}
|
||||
|
||||
; Alloca is written through a flat pointer
|
||||
define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
|
||||
define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat([31 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
|
||||
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
|
||||
; CHECK-NEXT: ret i8 [[LOAD]]
|
||||
;
|
||||
%alloca = alloca [32 x i8], align 4, addrspace(5)
|
||||
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
|
||||
%alloca.cast.asc = addrspacecast i8 addrspace(5)* %alloca.cast to i8*
|
||||
%arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p0i8.p4i8.i64(i8* %alloca.cast.asc, i8 addrspace(4)* %arg.cast, i64 32, i1 false)
|
||||
%arg.cast = bitcast [31 x i8] addrspace(4)* %arg to i8 addrspace(4)*
|
||||
call void @llvm.memcpy.p0i8.p4i8.i64(i8* %alloca.cast.asc, i8 addrspace(4)* %arg.cast, i64 31, i1 false)
|
||||
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
|
||||
%load = load i8, i8 addrspace(5)* %gep
|
||||
ret i8 %load
|
||||
|
Loading…
Reference in New Issue
Block a user