[ARM][VecReduce] Force expand vector_reduce_fmin

Under MVE, we do not have any lowering for fminimum, which a vector_reduce_fmin without NoNan will be expanded into. As with the other recent patches, force this to expand in the pre-isel pass. Note that Neon lowering would be OK because the scalar fminimum uses the vector VMIN instruction, but is probably better to just rely on the scalar operations, which is what is done here. Also fixes what appears to be the reversal of INF vs -INF in the vector_reduce_fmin widening code.
2024-10-19 11:02:59 +02:00 · 2020-02-04 09:25:01 +00:00 · 2020-02-04 09:25:01 +00:00 · f48eb88663
commit f48eb88663
parent 293e799dfc
4 changed files with 2273 additions and 6 deletions
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -4678,11 +4678,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
    break;
  case ISD::VECREDUCE_FMAX:
    NeutralElem = DAG.getConstantFP(
-        std::numeric_limits<double>::infinity(), dl, ElemVT);
+        -std::numeric_limits<double>::infinity(), dl, ElemVT);
    break;
  case ISD::VECREDUCE_FMIN:
    NeutralElem = DAG.getConstantFP(
-        -std::numeric_limits<double>::infinity(), dl, ElemVT);
+        std::numeric_limits<double>::infinity(), dl, ElemVT);
    break;
  }

--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@ -176,12 +176,15 @@ public:
      // We don't have legalization support for ordered FP reductions.
      if (!II->getFastMathFlags().allowReassoc())
        return true;
-      LLVM_FALLTHROUGH;
+      // Can't legalize reductions with soft floats.
+      return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();

    case Intrinsic::experimental_vector_reduce_fmin:
    case Intrinsic::experimental_vector_reduce_fmax:
-      // Can't legalize reductions with soft floats.
-      return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
+      // Can't legalize reductions with soft floats, and NoNan will create
+      // fminimum which we do not know how to lower.
+      return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
+             !II->getFastMathFlags().noNaNs();

    default:
      // Don't expand anything else, let legalization deal with it.
--- a/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #2139095040
+; CHECK-NEXT:    mov w8, #-8388608
 ; CHECK-NEXT:    fmov s1, w8
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fmaxnmv s0, v0.4s
--- a/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll