From f00c577e2d2e555e049027a959ae46c4b243ed75 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 5 Mar 2021 16:46:10 +0100 Subject: [PATCH] [LegalizeDAG] Implement promotion rules for SELECT_CC Implement the promotion rule for SELECT_CC nodes by upcasting all the parameters and downcasting the result. The AArch64 target makes use of this rule and, since it was not implemented, in some cases the instruction selector would hit an assertion upon encountering the illegal node. This patch requires D97840, the included test cases hit both problems. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D97859 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 44 +++- .../AArch64/vecreduce-fmax-legalization.ll | 216 ++++++++++++++++++ .../AArch64/vecreduce-fmin-legalization.ll | 216 ++++++++++++++++++ 3 files changed, 474 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8534fcd60e5..a2d5c528b59 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4518,11 +4518,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::STRICT_FSETCC || Node->getOpcode() == ISD::STRICT_FSETCCS) OVT = Node->getOperand(1).getSimpleValueType(); - if (Node->getOpcode() == ISD::BR_CC) + if (Node->getOpcode() == ISD::BR_CC || + Node->getOpcode() == ISD::SELECT_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); - SDValue Tmp1, Tmp2, Tmp3; + SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: @@ -4714,6 +4715,45 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp1); break; } + + case ISD::SELECT_CC: { + SDValue Cond = Node->getOperand(4); + ISD::CondCode CCCode = cast(Cond)->get(); + // Type of the comparison operands. + MVT CVT = Node->getSimpleValueType(0); + assert(CVT == OVT && "not handled"); + + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + + // Promote the comparison operands, if needed. + if (TLI.isCondCodeLegal(CCCode, CVT)) { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + } else { + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + } + // Cast the true/false operands. + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + Tmp4 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); + + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, NVT, {Tmp1, Tmp2, Tmp3, Tmp4, Cond}, + Node->getFlags()); + + // Cast the result back to the original type. + if (ExtOp != ISD::FP_EXTEND) + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1); + else + Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1, + DAG.getIntPtrConstant(0, dl)); + + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { diff --git a/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index d26db2aefee..db70219f437 100644 --- a/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -8,6 +8,7 @@ declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) declare half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) +declare half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a) declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) @@ -104,6 +105,221 @@ define half @test_v4f16_ninf(<4 x half> %a) nounwind { ret half %b } +define half @test_v11f16(<11 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v11f16: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: ldr h18, [sp, #8] +; CHECK-NOFP-NEXT: ldr h17, [sp] +; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcmp s1, s18 +; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: adrp x8, .LCPI6_0 +; CHECK-NOFP-NEXT: fcsel s1, s1, s18, gt +; CHECK-NOFP-NEXT: fcmp s0, s17 +; CHECK-NOFP-NEXT: ldr h18, [x8, :lo12:.LCPI6_0] +; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcvt h1, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s2, s16 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s2, s2, s16, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: mov w8, #-8388608 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt h2, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fmov s17, w8 +; CHECK-NOFP-NEXT: fcmp s3, s18 +; CHECK-NOFP-NEXT: fcvt s1, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s3, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt h2, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s4, s18 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s4, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt s5, h5 +; CHECK-NOFP-NEXT: fcvt h3, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s5, s18 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s4, s5, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt s6, h6 +; CHECK-NOFP-NEXT: fcvt h4, s4 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s6, s18 +; CHECK-NOFP-NEXT: fcvt s1, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s6, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s7, h7 +; CHECK-NOFP-NEXT: fcvt h4, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s7, s18 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s7, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s4 +; CHECK-NOFP-NEXT: fcvt h5, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h5 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v11f16: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: movi v16.8h, #252, lsl #8 +; CHECK-FP-NEXT: mov x8, sp +; CHECK-FP-NEXT: ld1 { v16.h }[0], [x8] +; CHECK-FP-NEXT: add x8, sp, #8 // =8 +; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 +; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 +; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 +; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 +; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 +; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[1], [x8] +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: add x8, sp, #16 // =16 +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] +; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] +; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v16.8h +; CHECK-FP-NEXT: fmaxnmv h0, v0.8h +; CHECK-FP-NEXT: ret + %b = call nnan half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a) + ret half %b +} + +define half @test_v11f16_ninf(<11 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v11f16_ninf: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: ldr h18, [sp, #8] +; CHECK-NOFP-NEXT: ldr h17, [sp] +; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcmp s1, s18 +; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: adrp x8, .LCPI7_0 +; CHECK-NOFP-NEXT: fcsel s1, s1, s18, gt +; CHECK-NOFP-NEXT: fcmp s0, s17 +; CHECK-NOFP-NEXT: ldr h18, [x8, :lo12:.LCPI7_0] +; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcvt h1, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s2, s16 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: mov w8, #57344 +; CHECK-NOFP-NEXT: fcsel s2, s2, s16, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt h2, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fmov s17, w8 +; CHECK-NOFP-NEXT: fcmp s3, s18 +; CHECK-NOFP-NEXT: fcvt s1, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s3, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt h2, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s4, s18 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s4, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt s5, h5 +; CHECK-NOFP-NEXT: fcvt h3, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s5, s18 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s4, s5, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt s6, h6 +; CHECK-NOFP-NEXT: fcvt h4, s4 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s6, s18 +; CHECK-NOFP-NEXT: fcvt s1, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s6, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s7, h7 +; CHECK-NOFP-NEXT: fcvt h4, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s7, s18 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s7, s17, gt +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s4 +; CHECK-NOFP-NEXT: fcvt h5, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h5 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v11f16_ninf: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: mvni v16.8h, #4, lsl #8 +; CHECK-FP-NEXT: mov x8, sp +; CHECK-FP-NEXT: ld1 { v16.h }[0], [x8] +; CHECK-FP-NEXT: add x8, sp, #8 // =8 +; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 +; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 +; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 +; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 +; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 +; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[1], [x8] +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: add x8, sp, #16 // =16 +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] +; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] +; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v16.8h +; CHECK-FP-NEXT: fmaxnmv h0, v0.8h +; CHECK-FP-NEXT: ret + %b = call nnan ninf half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a) + ret half %b +} + define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: diff --git a/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll index 52d6e9773ab..4925f049f95 100644 --- a/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -8,6 +8,7 @@ declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a) declare half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) +declare half @llvm.vector.reduce.fmin.v11f16(<11 x half> %a) declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a) declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a) declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) @@ -104,6 +105,221 @@ define half @test_v4f16_ninf(<4 x half> %a) nounwind { ret half %b } +define half @test_v11f16(<11 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v11f16: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: ldr h18, [sp, #8] +; CHECK-NOFP-NEXT: ldr h17, [sp] +; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcmp s1, s18 +; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: adrp x8, .LCPI6_0 +; CHECK-NOFP-NEXT: fcsel s1, s1, s18, lt +; CHECK-NOFP-NEXT: fcmp s0, s17 +; CHECK-NOFP-NEXT: ldr h18, [x8, :lo12:.LCPI6_0] +; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcvt h1, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s2, s16 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s2, s2, s16, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: mov w8, #2139095040 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt h2, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fmov s17, w8 +; CHECK-NOFP-NEXT: fcmp s3, s18 +; CHECK-NOFP-NEXT: fcvt s1, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s3, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt h2, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s4, s18 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s4, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt s5, h5 +; CHECK-NOFP-NEXT: fcvt h3, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s5, s18 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s4, s5, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt s6, h6 +; CHECK-NOFP-NEXT: fcvt h4, s4 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s6, s18 +; CHECK-NOFP-NEXT: fcvt s1, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s6, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s7, h7 +; CHECK-NOFP-NEXT: fcvt h4, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s7, s18 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s7, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s4 +; CHECK-NOFP-NEXT: fcvt h5, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h5 +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v11f16: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: movi v16.8h, #124, lsl #8 +; CHECK-FP-NEXT: mov x8, sp +; CHECK-FP-NEXT: ld1 { v16.h }[0], [x8] +; CHECK-FP-NEXT: add x8, sp, #8 // =8 +; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 +; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 +; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 +; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 +; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 +; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[1], [x8] +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: add x8, sp, #16 // =16 +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] +; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] +; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v16.8h +; CHECK-FP-NEXT: fminnmv h0, v0.8h +; CHECK-FP-NEXT: ret + %b = call nnan half @llvm.vector.reduce.fmin.v11f16(<11 x half> %a) + ret half %b +} + +define half @test_v11f16_ninf(<11 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v11f16_ninf: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: ldr h18, [sp, #8] +; CHECK-NOFP-NEXT: ldr h17, [sp] +; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcmp s1, s18 +; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: adrp x8, .LCPI7_0 +; CHECK-NOFP-NEXT: fcsel s1, s1, s18, lt +; CHECK-NOFP-NEXT: fcmp s0, s17 +; CHECK-NOFP-NEXT: ldr h18, [x8, :lo12:.LCPI7_0] +; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcvt h1, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s2, s16 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: mov w8, #57344 +; CHECK-NOFP-NEXT: fcsel s2, s2, s16, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: movk w8, #18303, lsl #16 +; CHECK-NOFP-NEXT: fcvt s18, h18 +; CHECK-NOFP-NEXT: fcvt h2, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fmov s17, w8 +; CHECK-NOFP-NEXT: fcmp s3, s18 +; CHECK-NOFP-NEXT: fcvt s1, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s3, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt h2, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s4, s18 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s3, s4, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt s5, h5 +; CHECK-NOFP-NEXT: fcvt h3, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s5, s18 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s4, s5, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt s6, h6 +; CHECK-NOFP-NEXT: fcvt h4, s4 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s6, s18 +; CHECK-NOFP-NEXT: fcvt s1, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s6, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt s7, h7 +; CHECK-NOFP-NEXT: fcvt h4, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcmp s7, s18 +; CHECK-NOFP-NEXT: fcvt s4, h4 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcsel s5, s7, s17, lt +; CHECK-NOFP-NEXT: fminnm s0, s0, s4 +; CHECK-NOFP-NEXT: fcvt h5, s5 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h5 +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v11f16_ninf: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: mvni v16.8h, #132, lsl #8 +; CHECK-FP-NEXT: mov x8, sp +; CHECK-FP-NEXT: ld1 { v16.h }[0], [x8] +; CHECK-FP-NEXT: add x8, sp, #8 // =8 +; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 +; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 +; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 +; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 +; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 +; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[1], [x8] +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: add x8, sp, #16 // =16 +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: ld1 { v16.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] +; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] +; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v16.8h +; CHECK-FP-NEXT: fminnmv h0, v0.8h +; CHECK-FP-NEXT: ret + %b = call nnan ninf half @llvm.vector.reduce.fmin.v11f16(<11 x half> %a) + ret half %b +} + define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: