[X86] Make FeatureAVX512 imply FeatureFMA.

Previously our VEX patterns were checking Subtarget.hasFMA() which checked FMA || AVX512. So we were behaving as if AVX512 implied it anyway. Which means we'd allow VEX encoded 128/256 FMA when AVX512F was enabled but AVX512VL is off. Regardless of the FMA flag. EVEX to VEX also transforms scalar EVEX FMA instructions to their VEX versions even without the FMA flag. Similarly for 128/256 under AVX512VL. So this makes AVX512 imply FeatureFMA to make our current behavior explicit. All known CPUs that support AVX512 have VEX FMA instructions. llvm-svn: 317520
2024-10-19 11:02:59 +02:00 · 2017-11-06 22:49:01 +00:00 · 2017-11-06 22:49:01 +00:00 · 7cfd38c78b
commit 7cfd38c78b
parent 073f609e25
2 changed files with 5 additions and 5 deletions
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@ -116,9 +116,12 @@ def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
                                      "Enable AVX2 instructions",
                                      [FeatureAVX]>;
+def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
+                                      "Enable three-operand fused multiple-add",
+                                      [FeatureAVX]>;
 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
                                      "Enable AVX-512 instructions",
-                                      [FeatureAVX2]>;
+                                      [FeatureAVX2, FeatureFMA]>;
 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
                      "Enable AVX-512 Exponential and Reciprocal Instructions",
                                      [FeatureAVX512]>;
@ -154,9 +157,6 @@ def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
                         "Enable packed carry-less multiplication instructions",
                               [FeatureSSE2]>;
-def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
-                                      "Enable three-operand fused multiple-add",
-                                      [FeatureAVX]>;
 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
                                      "Enable four-operand fused multiple-add",
                                      [FeatureAVX, FeatureSSE4A]>;
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@ -463,7 +463,7 @@ public:
  bool hasPCLMUL() const { return HasPCLMUL; }
  // Prefer FMA4 to FMA - its better for commutation/memory folding and
  // has equal or better performance on all supported targets.
-  bool hasFMA() const { return (HasFMA || hasAVX512()) && !HasFMA4; }
+  bool hasFMA() const { return HasFMA && !HasFMA4; }
  bool hasFMA4() const { return HasFMA4; }
  bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
  bool hasXOP() const { return HasXOP; }