From c63f0c139eaf244af22734d42eafba55822b669a Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Mon, 14 Jun 2021 12:17:35 +0200 Subject: [PATCH] [AMDGPU] Limit runs of fixLdsBranchVmemWARHazard The code in fixLdsBranchVmemWARHazard looks for patterns of a vmem/lds access followed by a branch, followed by an lds/vmem access. The handling of the hazard requires an arbitrary number of instructions to process. In the worst case where a function has a vmem access, but no lds accesses, all instructions are examined only to conclude that the hazard cannot occur. Add the pre-processing stage which detects if there is both lds and vmem present in the function and only then does the more costly search. This patch significantly improves compilation time in the cases the hazard cannot happen. In one pathological case I looked at IsHazardInst is needlesly called 88.6 milions times. The numbers could also be improved by introducing a map around the inner calls to ::getWaitStatesSince in fixLdsBranchVmemWARHazard, but nothing will beat not running fixLdsBranchVmemWARHazard at all in the cases detected by shouldRunLdsBranchVmemWARHazardFixup(). Differential Revision: https://reviews.llvm.org/D104219 --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 29 ++++++++++++++++++++++- lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 348947c6870..bc2fb1e9770 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -23,6 +23,9 @@ using namespace llvm; // Hazard Recoginizer Implementation //===----------------------------------------------------------------------===// +static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, + const GCNSubtarget &ST); + GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : IsHazardRecognizerMode(false), CurrCycleInstr(nullptr), @@ -34,6 +37,7 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : ClauseDefs(TRI.getNumRegUnits()) { MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 19 : 5; TSchedModel.init(&ST); + RunLdsBranchVmemWARHazardFixup = shouldRunLdsBranchVmemWARHazardFixup(MF, ST); } void GCNHazardRecognizer::Reset() { @@ -1074,10 +1078,33 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { return true; } -bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { +static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, + const GCNSubtarget &ST) { if (!ST.hasLdsBranchVmemWARHazard()) return false; + // Check if the necessary condition for the hazard is met: both LDS and VMEM + // instructions need to appear in the same function. + bool HasLds = false; + bool HasVmem = false; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + HasLds |= SIInstrInfo::isDS(MI); + HasVmem |= + SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI); + if (HasLds && HasVmem) + return true; + } + } + return false; +} + +bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { + if (!RunLdsBranchVmemWARHazardFixup) + return false; + + assert(ST.hasLdsBranchVmemWARHazard()); + auto IsHazardInst = [](const MachineInstr &MI) { if (SIInstrInfo::isDS(MI)) return 1; diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h index 5970dbc0292..162121c2c52 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -48,6 +48,7 @@ private: const SIInstrInfo &TII; const SIRegisterInfo &TRI; TargetSchedModel TSchedModel; + bool RunLdsBranchVmemWARHazardFixup; /// RegUnits of uses in the current soft memory clause. BitVector ClauseUses;