Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s | FileCheck %s
|
|
|
|
|
|
|
|
; PRFB <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 1, ..., 31
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfb_gather_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %imm, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_nx4vi32_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.s, uxtw]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfb.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfb_gather_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_nx4vi32_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #32
|
|
|
|
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.s, uxtw]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfb.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 32, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfb_gather_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_nx4vi32_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfb.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; PRFB <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 1, ..., 31
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfb_gather_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %imm, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_nx2vi64_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.d, uxtw]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfb.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfb_gather_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_nx2vi64_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #32
|
|
|
|
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.d, uxtw]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfb.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 32, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfb_gather_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfb_gather_nx2vi64_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfb.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
; PRFH <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 2, ..., 62
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %imm, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx4vi32_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x0, z0.s, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx4vi32_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #63
|
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N]], z0.s, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 63, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx4vi32_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; PRFH <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 2, ..., 62
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %imm, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx2vi64_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x0, z0.d, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx2vi64_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #63
|
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N]], z0.d, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 63, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx2vi64_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfh_gather_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
|
|
|
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfh.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
; PRFW <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 4, ..., 124
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %imm, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx4vi32_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x0, z0.s, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx4vi32_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N]], z0.s, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx4vi32_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; PRFW <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 124
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %imm, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx2vi64_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x0, z0.d, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx2vi64_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N]], z0.d, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx2vi64_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfw_gather_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
|
|
|
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfw.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
; PRFD <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 8, ..., 248
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %imm, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx4vi32_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x0, z0.s, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx4vi32_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N]], z0.s, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx4vi32_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; PRFD <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 248
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %imm, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx2vi64_runtime_offset:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x0, z0.d, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx2vi64_invalid_immediate_offset_upper_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N]], z0.d, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx2vi64_invalid_immediate_offset_lower_bound:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
define void @llvm_aarch64_sve_prfd_gather_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
|
|
|
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8:
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
|
|
|
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3]
|
|
|
|
; CHECK-NEXT: ret
|
2020-03-19 10:35:31 +01:00
|
|
|
call void @llvm.aarch64.sve.prfd.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
|
Implement IR intrinsics for gather prefetch.
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:
1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] -> 64-bit scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element
The instructions are associated the following intrinsics, respectively:
1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
i8* %base,
<vscale x 4 x i32> %offset,
<vscale x 4 x i1> %Pg,
i32 %prfop)
2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
i8* %base,
<vscale x 2 x i32> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
i8* %base,
<vscale x 2 x i64> %offset,
<vscale x 2 x i1> %Pg,
i32 %prfop)
4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
<vscale x 4 x i32> %bases,
i64 %imm,
<vscale x 4 x i1> %Pg,
i32 %prfop)
5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
<vscale x 2 x i64> %bases,
i64 %imm,
<vscale x 2 x i1> %Pg,
i32 %prfop)
The intrinsics are the IR counterpart of the following SVE ACLE functions:
* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)
Reviewers: andwar, sdesmalen, efriedma, rengolin
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75580
2020-02-26 23:19:42 +01:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2020-03-19 10:35:31 +01:00
|
|
|
declare void @llvm.aarch64.sve.prfb.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 %prfop)
|
|
|
|
declare void @llvm.aarch64.sve.prfb.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 %prfop)
|
|
|
|
declare void @llvm.aarch64.sve.prfh.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 %prfop)
|
|
|
|
declare void @llvm.aarch64.sve.prfh.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 %prfop)
|
|
|
|
declare void @llvm.aarch64.sve.prfw.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 %prfop)
|
|
|
|
declare void @llvm.aarch64.sve.prfw.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 %prfop)
|
|
|
|
declare void @llvm.aarch64.sve.prfd.gather.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %imm, i32 %prfop)
|
|
|
|
declare void @llvm.aarch64.sve.prfd.gather.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %imm, i32 %prfop)
|