[X86] Add a simple hack to IsProfitableToFold to prevent vselect+strict fp operations from being folded into masked instructions.

We really need to update the isel patterns to prevent this, but that requires some tablegen de-tangling. So this hack will work for correctness in the short term.
2025-02-01 13:11:39 +01:00 · 2019-12-18 14:24:20 -08:00 · 2019-12-18 14:24:20 -08:00 · 477cf157af
commit 477cf157af
parent 7e3bfeda71
2 changed files with 8 additions and 1 deletions
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -580,6 +580,12 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
  if (!N.hasOneUse())
    return false;

+  // FIXME: Temporary hack to prevent strict floating point nodes from
+  // folding into masked operations illegally.
+  if (U == Root && Root->getOpcode() == ISD::VSELECT &&
+      N.getOpcode() != ISD::LOAD && N.getOpcode() != X86ISD::VBROADCAST_LOAD)
+    return false;
+
  if (N.getOpcode() != ISD::LOAD)
    return true;

--- a/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@ -7317,7 +7317,8 @@ define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i3
 ; AVX512-LABEL: vpaddd_mask_test:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vptestmd %zmm2, %zmm2, %k1
-; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
 ; AVX512-NEXT:    retq
  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
  %x = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %i, <16 x float> %j, metadata !"round.dynamic", metadata !"fpexcept.strict") #0