mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[AArch64][SVE] Add ISel patterns for floating point compare with zero instructions
Additionally, lower the floating point compare SVE intrinsics to SETCC_MERGE_ZERO ISD nodes to avoid duplicating ISel patterns. Differential Revision: https://reviews.llvm.org/D105486
This commit is contained in:
parent
c4087cbb99
commit
7f15962ed8
@ -14401,30 +14401,35 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
||||
N->getValueType(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_fcmpge:
|
||||
case Intrinsic::aarch64_sve_cmpge:
|
||||
if (!N->getOperand(2).getValueType().isFloatingPoint())
|
||||
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
||||
N->getValueType(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), DAG.getCondCode(ISD::SETGE));
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_fcmpgt:
|
||||
case Intrinsic::aarch64_sve_cmpgt:
|
||||
if (!N->getOperand(2).getValueType().isFloatingPoint())
|
||||
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
||||
N->getValueType(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), DAG.getCondCode(ISD::SETGT));
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_fcmpeq:
|
||||
case Intrinsic::aarch64_sve_cmpeq:
|
||||
if (!N->getOperand(2).getValueType().isFloatingPoint())
|
||||
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
||||
N->getValueType(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_fcmpne:
|
||||
case Intrinsic::aarch64_sve_cmpne:
|
||||
if (!N->getOperand(2).getValueType().isFloatingPoint())
|
||||
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
||||
N->getValueType(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), DAG.getCondCode(ISD::SETNE));
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_fcmpuo:
|
||||
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
||||
N->getValueType(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), DAG.getCondCode(ISD::SETUO));
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_fadda:
|
||||
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_faddv:
|
||||
|
@ -1255,20 +1255,20 @@ let Predicates = [HasSVE] in {
|
||||
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
|
||||
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
|
||||
|
||||
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>;
|
||||
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>;
|
||||
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>;
|
||||
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>;
|
||||
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>;
|
||||
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>;
|
||||
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>;
|
||||
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
|
||||
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>;
|
||||
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>;
|
||||
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
|
||||
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
|
||||
|
||||
defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">;
|
||||
defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">;
|
||||
defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt">;
|
||||
defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle">;
|
||||
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
|
||||
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
|
||||
defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>;
|
||||
defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>;
|
||||
defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>;
|
||||
defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>;
|
||||
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
|
||||
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>;
|
||||
|
||||
defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
|
||||
defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
|
||||
|
@ -4394,6 +4394,14 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
|
||||
(cmp $Op1, $Op3, $Op2)>;
|
||||
}
|
||||
|
||||
multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt,
|
||||
ValueType intvt, Instruction cmp> {
|
||||
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, (SVEDup0), cc)),
|
||||
(cmp $Op1, $Op2)>;
|
||||
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
|
||||
(cmp $Op1, $Op2)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_cmp_0<bits<3> opc, string asm, CondCode cc, CondCode invcc> {
|
||||
def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>;
|
||||
def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>;
|
||||
@ -4754,10 +4762,13 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
|
||||
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm,
|
||||
CondCode cc1, CondCode cc2,
|
||||
CondCode invcc1, CondCode invcc2>
|
||||
: sve_fp_3op_p_pd<opc, asm, op> {
|
||||
CondCode invcc1, CondCode invcc2> {
|
||||
def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
|
||||
def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
|
||||
def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>;
|
||||
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
|
||||
@ -4797,10 +4808,26 @@ class sve_fp_2op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
|
||||
let Inst{3-0} = Pd;
|
||||
}
|
||||
|
||||
multiclass sve_fp_2op_p_pd<bits<3> opc, string asm> {
|
||||
multiclass sve_fp_2op_p_pd<bits<3> opc, string asm,
|
||||
CondCode cc1, CondCode cc2,
|
||||
CondCode invcc1, CondCode invcc2> {
|
||||
def _H : sve_fp_2op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
|
||||
def _S : sve_fp_2op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
|
||||
def _D : sve_fp_2op_p_pd<0b11, opc, asm, PPR64, ZPR64>;
|
||||
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
|
||||
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
|
||||
|
@ -308,3 +308,117 @@ define <vscale x 4 x i1> @ne_fast(<vscale x 4 x float> %x, <vscale x 4 x float>
|
||||
%y = fcmp fast one <vscale x 4 x float> %x, %x2
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @oeq_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: oeq_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp oeq <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @ogt_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: ogt_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @oge_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: oge_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp oge <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @olt_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: olt_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @ole_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: ole_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp ole <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @one_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: one_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp one <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @ueq_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: ueq_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: not p0.b, p0/z, p1.b
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp ueq <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @ugt_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: ugt_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmle p1.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: not p0.b, p0/z, p1.b
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp ugt <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @uge_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: uge_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmlt p1.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: not p0.b, p0/z, p1.b
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp uge <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @ult_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: ult_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: not p0.b, p0/z, p1.b
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp ult <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @ule_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: ule_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: not p0.b, p0/z, p1.b
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp ule <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
define <vscale x 4 x i1> @une_zero(<vscale x 4 x float> %x) {
|
||||
; CHECK-LABEL: une_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: not p0.b, p0/z, p1.b
|
||||
; CHECK-NEXT: ret
|
||||
%y = fcmp une <vscale x 4 x float> %x, zeroinitializer
|
||||
ret <vscale x 4 x i1> %y
|
||||
}
|
||||
|
@ -617,8 +617,7 @@ define void @masked_gather_v16f16(<16 x half>* %a, <16 x half*>* %b) #0 {
|
||||
; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]].h
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_1024-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_1024-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_1024-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h
|
||||
@ -638,8 +637,7 @@ define void @masked_gather_v32f16(<32 x half>* %a, <32 x half*>* %b) #0 {
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]].h
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h
|
||||
@ -702,8 +700,7 @@ define void @masked_gather_v8f32(<8 x float>* %a, <8 x float*>* %b) #0 {
|
||||
; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_512-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_512-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
@ -722,8 +719,7 @@ define void @masked_gather_v16f32(<16 x float>* %a, <16 x float*>* %b) #0 {
|
||||
; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_1024-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_1024-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
@ -742,8 +738,7 @@ define void @masked_gather_v32f32(<32 x float>* %a, <32 x float*>* %b) #0 {
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
@ -795,8 +790,7 @@ define void @masked_gather_v4f64(<4 x double>* %a, <4 x double*>* %b) #0 {
|
||||
; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4
|
||||
; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d
|
||||
; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; CHECK-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; CHECK-NEXT: st1d { [[RES]].d }, [[PG0]], [x0]
|
||||
; CHECK-NEXT: ret
|
||||
@ -813,8 +807,7 @@ define void @masked_gather_v8f64(<8 x double>* %a, <8 x double*>* %b) #0 {
|
||||
; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; VBITS_GE_512-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG0]], [x0]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
@ -831,8 +824,7 @@ define void @masked_gather_v16f64(<16 x double>* %a, <16 x double*>* %b) #0 {
|
||||
; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG0]], [x0]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
@ -849,8 +841,7 @@ define void @masked_gather_v32f64(<32 x double>* %a, <32 x double*>* %b) #0 {
|
||||
; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG0]], [x0]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -871,8 +862,7 @@ define void @masked_gather_32b_scaled_sext(<32 x half>* %a, <32 x i32>* %b, half
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, sxtw #1]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0]
|
||||
@ -893,8 +883,7 @@ define void @masked_gather_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, uxtw #1]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0]
|
||||
@ -915,8 +904,7 @@ define void @masked_gather_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, sxtw]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0]
|
||||
@ -938,8 +926,7 @@ define void @masked_gather_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i8
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, uxtw]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0]
|
||||
@ -961,8 +948,7 @@ define void @masked_gather_64b_scaled(<32 x float>* %a, <32 x i64>* %b, float* %
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, [x2, [[PTRS]].d, lsl #2]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
@ -982,8 +968,7 @@ define void @masked_gather_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %b
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, [x2, [[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
@ -1006,9 +991,8 @@ define void @masked_gather_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %o
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, x2
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS_ADD]].d]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
@ -1031,9 +1015,8 @@ define void @masked_gather_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 {
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, #4
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS_ADD]].d]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
@ -1054,9 +1037,8 @@ define void @masked_gather_passthru(<32 x float>* %a, <32 x float*>* %b, <32 x f
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PT:z[0-9]+]].s }, [[PG0]]/z, [x2]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: sel [[SEL:z[0-9]+]].s, [[PG1]], [[UZP]].s, [[PT]].s
|
||||
@ -1077,8 +1059,7 @@ define void @masked_gather_passthru_0(<32 x float>* %a, <32 x float*>* %b) #0 {
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0]
|
||||
|
@ -562,8 +562,7 @@ define void @masked_scatter_v16f16(<16 x half>* %a, <16 x half*>* %b) #0 {
|
||||
; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_1024-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h
|
||||
; VBITS_GE_1024-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
|
||||
; VBITS_GE_1024-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
@ -581,8 +580,7 @@ define void @masked_scatter_v32f16(<32 x half>* %a, <32 x half*>* %b) #0 {
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
|
||||
; VBITS_GE_2048-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
@ -639,8 +637,7 @@ define void @masked_scatter_v8f32(<8 x float>* %a, <8 x float*>* %b) #0 {
|
||||
; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s
|
||||
; VBITS_GE_512-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
@ -657,8 +654,7 @@ define void @masked_scatter_v16f32(<16 x float>* %a, <16 x float*>* %b) #0 {
|
||||
; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s
|
||||
; VBITS_GE_1024-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
@ -675,8 +671,7 @@ define void @masked_scatter_v32f32(<32 x float>* %a, <32 x float*>* %b) #0 {
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -723,8 +718,7 @@ define void @masked_scatter_v4f64(<4 x double>* %a, <4 x double*>* %b) #0 {
|
||||
; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4
|
||||
; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; CHECK-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d
|
||||
; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; CHECK-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = load <4 x double>, <4 x double>* %a
|
||||
@ -739,8 +733,7 @@ define void @masked_scatter_v8f64(<8 x double>* %a, <8 x double*>* %b) #0 {
|
||||
; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]]
|
||||
; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; VBITS_GE_512-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
%vals = load <8 x double>, <8 x double>* %a
|
||||
@ -755,8 +748,7 @@ define void @masked_scatter_v16f64(<16 x double>* %a, <16 x double*>* %b) #0 {
|
||||
; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]]
|
||||
; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; VBITS_GE_1024-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%vals = load <16 x double>, <16 x double>* %a
|
||||
@ -771,8 +763,7 @@ define void @masked_scatter_v32f64(<32 x double>* %a, <32 x double*>* %b) #0 {
|
||||
; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].d, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0
|
||||
; VBITS_GE_2048-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%vals = load <32 x double>, <32 x double>* %a
|
||||
@ -791,8 +782,7 @@ define void @masked_scatter_32b_scaled_sext(<32 x half>* %a, <32 x i32>* %b, hal
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, sxtw #1]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -811,8 +801,7 @@ define void @masked_scatter_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, hal
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, uxtw #1]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -831,8 +820,7 @@ define void @masked_scatter_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, sxtw]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -852,8 +840,7 @@ define void @masked_scatter_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i
|
||||
; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h
|
||||
; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, uxtw]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -873,8 +860,7 @@ define void @masked_scatter_64b_scaled(<32 x float>* %a, <32 x i64>* %b, float*
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], [x2, [[PTRS]].d, lsl #2]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -892,8 +878,7 @@ define void @masked_scatter_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], [x2, [[PTRS]].d]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
@ -914,8 +899,7 @@ define void @masked_scatter_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, x2
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS_ADD]].d]
|
||||
@ -937,8 +921,7 @@ define void @masked_scatter_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 {
|
||||
; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
||||
; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, #4
|
||||
; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]]
|
||||
; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0
|
||||
; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d
|
||||
; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s
|
||||
; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS_ADD]].d]
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
@ -6,7 +7,8 @@
|
||||
|
||||
define <vscale x 8 x i1> @facge_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: facge_h:
|
||||
; CHECK: facge p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: facge p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.facge.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
@ -16,7 +18,8 @@ define <vscale x 8 x i1> @facge_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a,
|
||||
|
||||
define <vscale x 4 x i1> @facge_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: facge_s:
|
||||
; CHECK: facge p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: facge p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.facge.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
@ -26,7 +29,8 @@ define <vscale x 4 x i1> @facge_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a
|
||||
|
||||
define <vscale x 2 x i1> @facge_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: facge_d:
|
||||
; CHECK: facge p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: facge p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.facge.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
@ -40,7 +44,8 @@ define <vscale x 2 x i1> @facge_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %
|
||||
|
||||
define <vscale x 8 x i1> @facgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: facgt_h:
|
||||
; CHECK: facgt p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: facgt p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.facgt.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
@ -50,7 +55,8 @@ define <vscale x 8 x i1> @facgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a,
|
||||
|
||||
define <vscale x 4 x i1> @facgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: facgt_s:
|
||||
; CHECK: facgt p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: facgt p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.facgt.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
@ -60,7 +66,8 @@ define <vscale x 4 x i1> @facgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a
|
||||
|
||||
define <vscale x 2 x i1> @facgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: facgt_d:
|
||||
; CHECK: facgt p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: facgt p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.facgt.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
@ -74,7 +81,8 @@ define <vscale x 2 x i1> @facgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %
|
||||
|
||||
define <vscale x 8 x i1> @fcmeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmeq_h:
|
||||
; CHECK: fcmeq p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpeq.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
@ -84,7 +92,8 @@ define <vscale x 8 x i1> @fcmeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a,
|
||||
|
||||
define <vscale x 4 x i1> @fcmeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmeq_s:
|
||||
; CHECK: fcmeq p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpeq.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
@ -94,7 +103,8 @@ define <vscale x 4 x i1> @fcmeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a
|
||||
|
||||
define <vscale x 2 x i1> @fcmeq_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmeq_d:
|
||||
; CHECK: fcmeq p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpeq.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
@ -102,13 +112,25 @@ define <vscale x 2 x i1> @fcmeq_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmeq_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcmeq_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpeq.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> zeroinitializer)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMGE
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmge_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmge_h:
|
||||
; CHECK: fcmge p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmge p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpge.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
@ -118,7 +140,8 @@ define <vscale x 8 x i1> @fcmge_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a,
|
||||
|
||||
define <vscale x 4 x i1> @fcmge_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmge_s:
|
||||
; CHECK: fcmge p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpge.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
@ -128,7 +151,8 @@ define <vscale x 4 x i1> @fcmge_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a
|
||||
|
||||
define <vscale x 2 x i1> @fcmge_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmge_d:
|
||||
; CHECK: fcmge p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmge p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpge.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
@ -136,13 +160,24 @@ define <vscale x 2 x i1> @fcmge_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmge_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcmge_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmge p0.d, p0/z, z0.d, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpge.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> zeroinitializer)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
;
|
||||
; FCMGT
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmgt_h:
|
||||
; CHECK: fcmgt p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpgt.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
@ -152,7 +187,8 @@ define <vscale x 8 x i1> @fcmgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a,
|
||||
|
||||
define <vscale x 4 x i1> @fcmgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmgt_s:
|
||||
; CHECK: fcmgt p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpgt.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
@ -162,7 +198,8 @@ define <vscale x 4 x i1> @fcmgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a
|
||||
|
||||
define <vscale x 2 x i1> @fcmgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmgt_d:
|
||||
; CHECK: fcmgt p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpgt.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
@ -170,13 +207,24 @@ define <vscale x 2 x i1> @fcmgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmgt_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcmgt_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpgt.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> zeroinitializer)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
;
|
||||
; FCMNE
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmne_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmne_h:
|
||||
; CHECK: fcmne p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmne p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpne.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
@ -186,7 +234,8 @@ define <vscale x 8 x i1> @fcmne_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a,
|
||||
|
||||
define <vscale x 4 x i1> @fcmne_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmne_s:
|
||||
; CHECK: fcmne p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpne.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
@ -196,7 +245,8 @@ define <vscale x 4 x i1> @fcmne_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a
|
||||
|
||||
define <vscale x 2 x i1> @fcmne_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmne_d:
|
||||
; CHECK: fcmne p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpne.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
@ -204,13 +254,25 @@ define <vscale x 2 x i1> @fcmne_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmne_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fcmne_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpne.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> zeroinitializer)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMPUO
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmuo_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmuo_h:
|
||||
; CHECK: fcmuo p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpuo.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
@ -220,7 +282,8 @@ define <vscale x 8 x i1> @fcmuo_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a,
|
||||
|
||||
define <vscale x 4 x i1> @fcmuo_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmuo_s:
|
||||
; CHECK: fcmuo p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpuo.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
@ -230,7 +293,8 @@ define <vscale x 4 x i1> @fcmuo_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a
|
||||
|
||||
define <vscale x 2 x i1> @fcmuo_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmuo_d:
|
||||
; CHECK: fcmuo p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpuo.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
|
Loading…
x
Reference in New Issue
Block a user