AMDGPU: Add feature for fast f32 denormals

2025-01-31 12:41:49 +01:00 · 2018-08-15 22:45:04 +03:00 · 2018-08-15 22:45:04 +03:00 · f2d158d7a0
commit f2d158d7a0
parent 0e9a095779
3 changed files with 12 additions and 4 deletions
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@ -33,6 +33,12 @@ def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
  "Assuming f32 fma is at least as fast as mul + add"
 >;

+def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32",
+  "FastDenormalF32",
+  "true",
+  "Enabling denormals does not cause f32 instructions to run at f64 rates"
+>;
+
 def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
  "MIMG_R128",
  "true",
@ -632,7 +638,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
   FeatureScalarStores, FeatureInv2PiInlineImm,
   FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
   FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
-   FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
+   FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureFastDenormalF32
  ]
 >;

@ -647,8 +653,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
   FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
   FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
   FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
-   FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
-  ]
+   FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
+   FeatureFastDenormalF32]
 >;

 def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
@ -665,7 +671,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
   FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
   FeatureVOP3Literal, FeatureDPP8,
   FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
-   FeatureGFX10A16
+   FeatureGFX10A16, FeatureFastDenormalF32
  ]
 >;

--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@ -178,6 +178,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
    MaxPrivateElementSize(0),

    FastFMAF32(false),
+    FastDenormalF32(false),
    HalfRate64Ops(false),

    FlatForGlobal(false),
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@ -291,6 +291,7 @@ protected:

  // Possibly statically set by tablegen, but may want to be overridden.
  bool FastFMAF32;
+  bool FastDenormalF32;
  bool HalfRate64Ops;

  // Dynamially set bits that enable features.