1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AArch64][SVE] Add ptest intrinsics

Summary:
Implements the following intrinsics:

    * @llvm.aarch64.sve.ptest.any
    * @llvm.aarch64.sve.ptest.first
    * @llvm.aarch64.sve.ptest.last

Reviewers: sdesmalen, efriedma, dancgr, mgudim, cameron.mcinally, rengolin

Reviewed By: efriedma

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72398
This commit is contained in:
Cullen Rhodes 2020-01-08 14:25:20 +00:00
parent 260af8e320
commit 15430bed65
7 changed files with 130 additions and 1 deletions

View File

@ -1014,6 +1014,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_PTEST_Intrinsic
: Intrinsic<[llvm_i1_ty],
[llvm_anyvector_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_TBL_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
@ -1552,6 +1558,14 @@ def int_aarch64_sve_pnext : AdvSIMD_Pred1VectorArg_Intrinsic;
def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;
//
// Testing predicates
//
def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic;
def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic;
def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;
//
// Gather loads:
//

View File

@ -1357,6 +1357,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
case AArch64ISD::INSR: return "AArch64ISD::INSR";
case AArch64ISD::PTEST: return "AArch64ISD::PTEST";
case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
@ -10889,6 +10890,30 @@ static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID,
return SDValue();
}
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
AArch64CC::CondCode Cond) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(Op);
EVT OpVT = Op.getValueType();
assert(OpVT.isScalableVector() && TLI.isTypeLegal(OpVT) &&
"Expected legal scalable vector type!");
// Ensure target specific opcodes are using legal type.
EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue TVal = DAG.getConstant(1, DL, OutVT);
SDValue FVal = DAG.getConstant(0, DL, OutVT);
// Set condition code (CC) flags.
SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
return DAG.getZExtOrTrunc(Res, DL, VT);
}
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@ -10989,6 +11014,15 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_cmpls_wide:
return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, true,
DCI, DAG);
case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);
case Intrinsic::aarch64_sve_ptest_first:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::FIRST_ACTIVE);
case Intrinsic::aarch64_sve_ptest_last:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::LAST_ACTIVE);
}
return SDValue();
}

View File

@ -212,6 +212,7 @@ enum NodeType : unsigned {
TBL,
INSR,
PTEST,
PTRUE,
// Unsigned gather loads.

View File

@ -73,6 +73,9 @@ def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithIn
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
let Predicates = [HasSVE] in {
def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;
@ -1086,6 +1089,15 @@ let Predicates = [HasSVE] in {
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
def : Pat<(AArch64ptest (nxv4i1 PPR:$pg), (nxv4i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;

View File

@ -250,7 +250,13 @@ enum CondCode { // Meaning (integer) Meaning (floating-point)
AL = 0xe, // Always (unconditional) Always (unconditional)
NV = 0xf, // Always (unconditional) Always (unconditional)
// Note the NV exists purely to disassemble 0b1111. Execution is "always".
Invalid
Invalid,
// Common aliases used for SVE.
ANY_ACTIVE = NE, // (!Z)
FIRST_ACTIVE = MI, // ( N)
LAST_ACTIVE = LO, // (!C)
NONE_ACTIVE = EQ // ( Z)
};
inline static const char *getCondCodeName(CondCode Code) {

View File

@ -0,0 +1,36 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
;
; PTEST
;
define i1 @ptest_any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: ptest_any:
; CHECK: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%out = call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
ret i1 %out
}
define i1 @ptest_first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: ptest_first:
; CHECK: ptest p0, p1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%out = call i1 @llvm.aarch64.sve.ptest.first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
ret i1 %out
}
define i1 @ptest_last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: ptest_last:
; CHECK: ptest p0, p1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%out = call i1 @llvm.aarch64.sve.ptest.last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
ret i1 %out
}
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
declare i1 @llvm.aarch64.sve.ptest.first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
declare i1 @llvm.aarch64.sve.ptest.last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)

View File

@ -0,0 +1,26 @@
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
; Ensure we use the inverted CC result of SVE compare instructions when branching.
define void @sve_cmplt_setcc_inverted(<vscale x 8 x i16>* %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) {
; CHECK-LABEL: @sve_cmplt_setcc_inverted
; CHECK: cmplt p1.h, p0/z, z0.h, #0
; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: b.ne
entry:
%0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer)
%1 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %0)
br i1 %1, label %if.end, label %if.then
if.then:
tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %in, <vscale x 8 x i16>* %out, i32 2, <vscale x 8 x i1> %pg)
br label %if.end
if.end:
ret void
}
declare i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
declare void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32, <vscale x 8 x i1>)