mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AArch64][SVE] Add "fast" fcmp operations.
dacf8d3 added support for most fcmp operations, but there are some extra variations I hadn't considered: SelectionDAG supports float comparisons that are neither ordered nor unordered. Add support for the missing operations. Differential Revision: https://reviews.llvm.org/D84460
This commit is contained in:
parent
b8680170b1
commit
d2a7f965f0
@ -230,7 +230,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||||||
MVT::nxv2f64 }) {
|
MVT::nxv2f64 }) {
|
||||||
setCondCodeAction(ISD::SETO, VT, Expand);
|
setCondCodeAction(ISD::SETO, VT, Expand);
|
||||||
setCondCodeAction(ISD::SETOLT, VT, Expand);
|
setCondCodeAction(ISD::SETOLT, VT, Expand);
|
||||||
|
setCondCodeAction(ISD::SETLT, VT, Expand);
|
||||||
setCondCodeAction(ISD::SETOLE, VT, Expand);
|
setCondCodeAction(ISD::SETOLE, VT, Expand);
|
||||||
|
setCondCodeAction(ISD::SETLE, VT, Expand);
|
||||||
setCondCodeAction(ISD::SETULT, VT, Expand);
|
setCondCodeAction(ISD::SETULT, VT, Expand);
|
||||||
setCondCodeAction(ISD::SETULE, VT, Expand);
|
setCondCodeAction(ISD::SETULE, VT, Expand);
|
||||||
setCondCodeAction(ISD::SETUGE, VT, Expand);
|
setCondCodeAction(ISD::SETUGE, VT, Expand);
|
||||||
|
@ -210,6 +210,19 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
|
|||||||
|
|
||||||
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
|
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
|
||||||
|
|
||||||
|
def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
|
||||||
|
[(setoge node:$lhs, node:$rhs),
|
||||||
|
(setge node:$lhs, node:$rhs)]>;
|
||||||
|
def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
|
||||||
|
[(setogt node:$lhs, node:$rhs),
|
||||||
|
(setgt node:$lhs, node:$rhs)]>;
|
||||||
|
def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
|
||||||
|
[(setoeq node:$lhs, node:$rhs),
|
||||||
|
(seteq node:$lhs, node:$rhs)]>;
|
||||||
|
def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
|
||||||
|
[(setone node:$lhs, node:$rhs),
|
||||||
|
(setne node:$lhs, node:$rhs)]>;
|
||||||
|
|
||||||
let Predicates = [HasSVE] in {
|
let Predicates = [HasSVE] in {
|
||||||
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
|
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
|
||||||
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
|
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
|
||||||
@ -1172,10 +1185,10 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||||||
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
|
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
|
||||||
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
|
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
|
||||||
|
|
||||||
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
|
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
|
||||||
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
|
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
|
||||||
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq>;
|
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
|
||||||
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone>;
|
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
|
||||||
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
|
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
|
||||||
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
|
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
|
||||||
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
|
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
|
||||||
|
@ -257,3 +257,58 @@ define <vscale x 4 x i32> @oeq_4f32_zext(<vscale x 4 x float> %x, <vscale x 4 x
|
|||||||
%r = zext <vscale x 4 x i1> %y to <vscale x 4 x i32>
|
%r = zext <vscale x 4 x i1> %y to <vscale x 4 x i32>
|
||||||
ret <vscale x 4 x i32> %r
|
ret <vscale x 4 x i32> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i1> @eq_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
|
||||||
|
; CHECK-LABEL: eq_fast:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%y = fcmp fast oeq <vscale x 4 x float> %x, %x2
|
||||||
|
ret <vscale x 4 x i1> %y
|
||||||
|
}
|
||||||
|
define <vscale x 4 x i1> @gt_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
|
||||||
|
; CHECK-LABEL: gt_fast:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%y = fcmp fast ogt <vscale x 4 x float> %x, %x2
|
||||||
|
ret <vscale x 4 x i1> %y
|
||||||
|
}
|
||||||
|
define <vscale x 4 x i1> @ge_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
|
||||||
|
; CHECK-LABEL: ge_fast:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%y = fcmp fast oge <vscale x 4 x float> %x, %x2
|
||||||
|
ret <vscale x 4 x i1> %y
|
||||||
|
}
|
||||||
|
define <vscale x 4 x i1> @lt_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
|
||||||
|
; CHECK-LABEL: lt_fast:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%y = fcmp fast olt <vscale x 4 x float> %x, %x2
|
||||||
|
ret <vscale x 4 x i1> %y
|
||||||
|
}
|
||||||
|
define <vscale x 4 x i1> @le_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
|
||||||
|
; CHECK-LABEL: le_fast:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%y = fcmp fast ole <vscale x 4 x float> %x, %x2
|
||||||
|
ret <vscale x 4 x i1> %y
|
||||||
|
}
|
||||||
|
define <vscale x 4 x i1> @ne_fast(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
|
||||||
|
; CHECK-LABEL: ne_fast:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%y = fcmp fast one <vscale x 4 x float> %x, %x2
|
||||||
|
ret <vscale x 4 x i1> %y
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user