mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
259a327aae
This is split off from D79100 and: - adds a intrinsic description/definition for @llvm.get.active.lane.mask(), and - describe its semantics in LangRef. As described (in more detail) in its LangRef section, it is semantically equivalent to an icmp with the vector induction variable and the back-edge taken count, and generates a mask of active/inactive vector lanes. It will have several use cases. First, it will be used by the ExpandVectorPredication pass for the VP intrinsics, to expand VP intrinsics for scalable vectors on targets that do not support the `%evl` parameter, see D78203. Also, this is part of, and essential for our ARM MVE tail-predication story: - this intrinsic will be emitted by the LoopVectorizer in D79100, when the scalar epilogue is tail-folded into the vector body. This new intrinsic will generate the predicate for the masked loads/stores, and it takes the back-edge taken count as an argument. The back-edge taken count represents the number of elements processed by the loop, which we need to setup MVE tail-predication. - Emitting the intrinsic is controlled by a new TTI hook, see D80597. - We pick up this new intrinsic in an ARM MVETailPredication backend pass, see D79175, and convert it to a MVE target specific intrinsic/instruction to create a tail-predicated loop. Differential Revision: https://reviews.llvm.org/D80596
22 lines
722 B
LLVM
22 lines
722 B
LLVM
; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
|
|
|
|
declare <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32, i32)
|
|
|
|
define <4 x i32> @t1(i32 %IV, i32 %BTC) {
|
|
; CHECK: get_active_lane_mask: element type is not i1
|
|
; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
|
|
|
|
%res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
declare i32 @llvm.get.active.lane.mask.i32.i32(i32, i32)
|
|
|
|
define i32 @t2(i32 %IV, i32 %BTC) {
|
|
; CHECK: Intrinsic has incorrect return type!
|
|
; CHECK-NEXT: i32 (i32, i32)* @llvm.get.active.lane.mask.i32.i32
|
|
|
|
%res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %BTC)
|
|
ret i32 %res
|
|
}
|