diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h index 37590f496ca..53c5fc0edee 100644 --- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h +++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h @@ -114,6 +114,14 @@ public: // Default implementation: count it as a cycle. AdvanceCycle(); } + + /// EmitNoops - This callback is invoked when noops were added to the + /// instruction stream. + virtual void EmitNoops(unsigned Quantity) { + // Default implementation: count it as a cycle. + for (unsigned i = 0; i < Quantity; ++i) + EmitNoop(); + } }; } // end namespace llvm diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index f00741530b8..96cca025778 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -1343,6 +1343,11 @@ public: virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + /// Insert noops into the instruction stream at the specified point. + virtual void insertNoops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Quantity) const; + /// Return the noop instruction to use for a noop. virtual void getNoop(MCInst &NopInst) const; diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 4f88f4d3dd6..82ed386db82 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { for (MachineInstr &MI : MBB) { // If we need to emit noops prior to this instruction, then do so. unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI); - for (unsigned i = 0; i != NumPreNoops; ++i) { - HazardRec->EmitNoop(); - TII->insertNoop(MBB, MachineBasicBlock::iterator(MI)); - ++NumNoops; - } + HazardRec->EmitNoops(NumPreNoops); + TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops); + NumNoops += NumPreNoops; HazardRec->EmitInstruction(&MI); if (HazardRec->atIssueLimit()) { diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index fe9feb5f116..7e8fe93eb8e 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, llvm_unreachable("Target didn't implement insertNoop!"); } +/// insertNoops - Insert noops into the instruction stream at the specified +/// point. +void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Quantity) const { + for (unsigned i = 0; i < Quantity; ++i) + insertNoop(MBB, MI); +} + static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) { return strncmp(Str, MAI.getCommentString().data(), MAI.getCommentString().size()) == 0; diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0c921bccc77..626b2358133 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1047,9 +1047,6 @@ void GCNPassConfig::addPreEmitPass() { // // Here we add a stand-alone hazard recognizer pass which can handle all // cases. - // - // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would - // be better for it to emit S_NOP when possible. addPass(&PostRAHazardRecognizerID); addPass(&BranchRelaxationPassID); } diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 77ed364dedc..dbd3d351729 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1533,25 +1533,24 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addMemOperand(MMO); } -void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - int Count) const { - DebugLoc DL = MBB.findDebugLoc(MI); - while (Count > 0) { - int Arg; - if (Count >= 8) - Arg = 7; - else - Arg = Count - 1; - Count -= 8; - BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)) - .addImm(Arg); - } -} - void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - insertWaitStates(MBB, MI, 1); + insertNoops(MBB, MI, 1); +} + +void SIInstrInfo::insertNoops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Quantity) const { + DebugLoc DL = MBB.findDebugLoc(MI); + while (Quantity > 0) { + unsigned Arg; + if (Quantity >= 8) + Arg = 7; + else + Arg = Quantity - 1; + Quantity -= Arg + 1; + BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg); + } } void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 08bf3d27c74..c77d3fb4342 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -898,12 +898,12 @@ public: /// VALU if necessary. If present, \p MDT is updated. void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; - void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI, - int Count) const; - void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned Quantity) const override; + void insertReturn(MachineBasicBlock &MBB) const; /// Return the number of wait states that result from executing this /// instruction. diff --git a/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir index 08ac96ae719..a96490f3416 100644 --- a/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir +++ b/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir @@ -2,9 +2,7 @@ # GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode # GCN: FLAT_ATOMIC_FCMPSWAP -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fcmpswap_to_s_denorm_mode @@ -16,9 +14,7 @@ body: | # GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode # GCN: FLAT_ATOMIC_FCMPSWAP_X2 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fcmpswap_x2_to_s_denorm_mode @@ -30,9 +26,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMAX -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmax_to_s_denorm_mode @@ -44,9 +38,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMAX_X2 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmax_x2_to_s_denorm_mode @@ -58,9 +50,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMIN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmin_to_s_denorm_mode @@ -72,9 +62,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMIN_X2 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmin_x2_to_s_denorm_mode @@ -86,9 +74,7 @@ body: | # GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode # GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode @@ -100,9 +86,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMAX_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmax_rtn_to_s_denorm_mode @@ -114,9 +98,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMAX_X2_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode @@ -128,9 +110,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMIN_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmin_rtn_to_s_denorm_mode @@ -142,9 +122,7 @@ body: | # GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode # GCN: FLAT_ATOMIC_FMIN_X2_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode @@ -156,9 +134,7 @@ body: | # GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode # GCN: FLAT_ATOMIC_FCMPSWAP_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode @@ -170,9 +146,7 @@ body: | # GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FCMPSWAP -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fcmpswap_to_s_denorm_mode @@ -184,9 +158,7 @@ body: | # GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FCMPSWAP_X2 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fcmpswap_x2_to_s_denorm_mode @@ -198,9 +170,7 @@ body: | # GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMAX -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmax_to_s_denorm_mode @@ -212,9 +182,7 @@ body: | # GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMAX_X2 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmax_x2_to_s_denorm_mode @@ -226,9 +194,7 @@ body: | # GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMIN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmin_to_s_denorm_mode @@ -240,9 +206,7 @@ body: | # GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMIN_X2 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmin_x2_to_s_denorm_mode @@ -254,9 +218,7 @@ body: | # GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fcmpswap_rtn_to_s_denorm_mode @@ -268,9 +230,7 @@ body: | # GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode @@ -282,9 +242,7 @@ body: | # GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMAX_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmax_rtn_to_s_denorm_mode @@ -296,9 +254,7 @@ body: | # GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMAX_X2_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmax_x2_rtn_to_s_denorm_mode @@ -310,9 +266,7 @@ body: | # GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMIN_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmin_rtn_to_s_denorm_mode @@ -324,9 +278,7 @@ body: | # GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMIN_X2_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmin_x2_rtn_to_s_denorm_mode @@ -338,9 +290,7 @@ body: | # GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fcmpswap_saddr_to_s_denorm_mode @@ -352,9 +302,7 @@ body: | # GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode @@ -366,9 +314,7 @@ body: | # GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode @@ -380,9 +326,7 @@ body: | # GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode @@ -394,9 +338,7 @@ body: | # GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode @@ -408,9 +350,7 @@ body: | # GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode # GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: S_DENORM_MODE --- name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode diff --git a/test/CodeGen/AMDGPU/frem.ll b/test/CodeGen/AMDGPU/frem.ll index 9287fae037b..c50ffcfba3e 100644 --- a/test/CodeGen/AMDGPU/frem.ll +++ b/test/CodeGen/AMDGPU/frem.ll @@ -612,8 +612,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 ; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9 ; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc -; SI-NEXT: s_nop 0 -; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 1 ; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11] ; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] ; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 @@ -740,8 +739,7 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs ; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 ; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9 ; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc -; SI-NEXT: s_nop 0 -; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 1 ; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11] ; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] ; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 @@ -1842,8 +1840,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; SI-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9 ; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v3, v13 ; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc -; SI-NEXT: s_nop 0 -; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 1 ; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15] ; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3] ; SI-NEXT: v_bfe_u32 v10, v9, 20, 11 @@ -1876,8 +1873,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; SI-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 ; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v11 ; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc -; SI-NEXT: s_nop 0 -; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 1 ; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13] ; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1] ; SI-NEXT: v_bfe_u32 v8, v7, 20, 11 diff --git a/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir b/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir index d5d2512795b..b02b6b0664b 100644 --- a/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir +++ b/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir @@ -40,10 +40,7 @@ body: | # GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr # GCN: S_LOAD_DWORDX2_IMM # GCN-NEXT: } -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 3 # GCN: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_hazard_ignore_bundle_instr @@ -63,11 +60,7 @@ body: | # GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle # GCN: bb.2: -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 4 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_min_of_two_after_bundle diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir index 1922adf5ee6..2a1442bb0a5 100644 --- a/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -1,5 +1,5 @@ -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,SICI +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,SICI # RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9 @@ -24,26 +24,17 @@ # GCN-LABEL: bb.1: # GCN: V_CMP_EQ_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP +# GCN: S_NOP 3 # GCN: V_DIV_FMAS_F32 # GCN-LABEL: bb.2: # GCN: V_CMP_EQ_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP +# GCN: S_NOP 3 # GCN: V_DIV_FMAS_F32 # GCN-LABEL: bb.3: # GCN: V_DIV_SCALE_F32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP +# GCN: S_NOP 3 # GCN: V_DIV_FMAS_F32 name: div_fmas @@ -76,14 +67,12 @@ body: | # GCN-LABEL: bb.0: # GCN: S_SETREG -# GCN: S_NOP 0 -# GCN: S_NOP 0 +# GCN: S_NOP 1 # GCN: S_GETREG # GCN-LABEL: bb.1: # GCN: S_SETREG_IMM32 -# GCN: S_NOP 0 -# GCN: S_NOP 0 +# GCN: S_NOP 1 # GCN: S_GETREG # GCN-LABEL: bb.2: @@ -126,15 +115,15 @@ body: | # GCN-LABEL: bb.0: # GCN: S_SETREG -# GCN: S_NOP 0 -# VI: S_NOP 0 -# GCN-NEXT: S_SETREG +# SICI: S_NOP 0 +# VI: S_NOP 1 +# GCN: S_SETREG # GCN-LABEL: bb.1: # GCN: S_SETREG -# GCN: S_NOP 0 -# VI: S_NOP 0 -# GCN-NEXT: S_SETREG +# SICI: S_NOP 0 +# VI: S_NOP 1 +# GCN: S_SETREG # GCN-LABEL: bb.2: # GCN: S_SETREG @@ -239,34 +228,22 @@ body: | # GCN-LABEL: bb.0: # GCN: V_ADD_CO_U32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP +# GCN: S_NOP 3 # GCN: V_READLANE_B32 # GCN-LABEL: bb.1: # GCN: V_ADD_CO_U32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP +# GCN: S_NOP 3 # GCN: V_WRITELANE_B32 # GCN-LABEL: bb.2: # GCN: V_ADD_CO_U32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP +# GCN: S_NOP 3 # GCN: V_READLANE_B32 # GCN-LABEL: bb.3: # GCN: V_ADD_CO_U32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP +# GCN: S_NOP 3 # GCN: V_WRITELANE_B32 name: readwrite_lane @@ -429,17 +406,12 @@ body: | # VI-LABEL: bb.0: # VI: V_MOV_B32_e32 -# VI-NEXT: S_NOP 0 -# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 1 # VI-NEXT: V_MOV_B32_dpp # VI-LABEL: bb.1: # VI: V_CMPX_EQ_I32_e32 -# VI-NEXT: S_NOP 0 -# VI-NEXT: S_NOP 0 -# VI-NEXT: S_NOP 0 -# VI-NEXT: S_NOP 0 -# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 4 # VI-NEXT: V_MOV_B32_dpp name: dpp diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll index 31a54f1e0ff..1420b513b03 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -7,9 +7,7 @@ ; VI-LABEL: {{^}}dpp_test: ; VI: v_mov_b32_e32 v0, s{{[0-9]+}} ; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}} -; PREGFX10-OPT: s_nop 1 -; PREGFX10-NOOPT: s_nop 0 -; PREGFX10-NOOPT: s_nop 0 +; PREGFX10: s_nop 1 ; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] ; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11] define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { @@ -21,14 +19,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { ; VI-LABEL: {{^}}dpp_wait_states: ; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}} ; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}} -; PREGFX10-OPT: s_nop 1 -; PREGFX10-NOOPT: s_nop 0 -; PREGFX10-NOOPT: s_nop 0 +; PREGFX10: s_nop 1 ; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl: -; PREGFX10-OPT: s_nop 1 -; PREGFX10-NOOPT: s_nop 0 -; PREGFX10-NOOPT: s_nop 0 +; PREGFX10: s_nop 1 ; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { @@ -44,13 +38,10 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { ; PREGFX10-OPT: s_mov_b32 ; PREGFX10-NOOPT: s_waitcnt ; PREGFX10-NOOPT: v_mov_b32_e32 -; PREGFX10-NOOPT-NEXT: s_nop 0 ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 -; PREGFX10-OPT: s_nop 1 +; PREGFX10: s_nop 1 ; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 -; PREGFX10-OPT: s_nop 1 -; PREGFX10-NOOPT: s_nop 0 -; PREGFX10-NOOPT: s_nop 0 +; PREGFX10: s_nop 1 ; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) { %cmp = fcmp oeq float %cond, 0.0 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll index 52562cc73e6..1d16b9ca3e4 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll @@ -5,9 +5,7 @@ ; GCN-LABEL: {{^}}dpp_test: ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} -; GFX8-OPT: s_nop 1 -; GFX8-NOOPT: s_nop 0 -; GFX8-NOOPT-NEXT: s_nop 0 +; GFX8: s_nop 1 ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0 @@ -18,9 +16,7 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) ; GCN-LABEL: {{^}}dpp_test_bc: ; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} ; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} -; GFX8-OPT: s_nop 1 -; GFX8-NOOPT: s_nop 0 -; GFX8-NOOPT-NEXT: s_nop 0 +; GFX8: s_nop 1 ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}} define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0 @@ -34,8 +30,9 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in ; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} ; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0 -; GFX8: s_nop 0 -; GFX8-NEXT: s_nop 0 +; GFX8-NOOPT: s_nop 1 +; GFX8-OPT: s_nop 0 +; GFX8-OPT-NEXT: s_nop 0 ; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf @0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4 define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr { diff --git a/test/CodeGen/AMDGPU/mai-hazards.mir b/test/CodeGen/AMDGPU/mai-hazards.mir index b0906f6018d..4a10af28984 100644 --- a/test/CodeGen/AMDGPU/mai-hazards.mir +++ b/test/CodeGen/AMDGPU/mai-hazards.mir @@ -3,8 +3,7 @@ # GCN-LABEL: name: valu_write_vgpr_mfma_read # GCN: V_MOV_B32 # GCN: V_MOV_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: valu_write_vgpr_mfma_read body: | @@ -17,8 +16,7 @@ body: | # GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read # GCN: V_MOV_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_ACCVGPR_WRITE_B32 name: valu_write_vgpr_accvgpr_write_read body: | @@ -41,8 +39,7 @@ body: | # GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: mfma_write_agpr_mfma_read_overlap body: | @@ -54,8 +51,7 @@ body: | # GCN-LABEL: name: mfma_write_agpr_mfma_read_partial # GCN: V_MFMA -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: mfma_write_agpr_mfma_read_partial body: | @@ -67,10 +63,7 @@ body: | # GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: mfma_write_agpr_mfma_srca_read_overlap body: | @@ -82,10 +75,7 @@ body: | # GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: mfma_write_agpr_mfma_srcb_read_overlap body: | @@ -97,10 +87,7 @@ body: | # GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read # GCN: V_MFMA_F32_4X4X1F32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 3 # GCN-NEXT: V_ACCVGPR_READ_B32 name: mfma_4x4_write_agpr_accvgpr_read body: | @@ -112,16 +99,8 @@ body: | # GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read # GCN: V_MFMA_F32_16X16X1F32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_ACCVGPR_READ_B32 name: mfma_16x16_write_agpr_accvgpr_read body: | @@ -133,24 +112,9 @@ body: | # GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read # GCN: V_MFMA_F32_32X32X2F32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_ACCVGPR_READ_B32 name: mfma_32x32_write_agpr_accvgpr_read body: | @@ -174,13 +138,7 @@ body: | # GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write # GCN: V_MFMA_F32_16X16X1F32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 6 # GCN-NEXT: V_ACCVGPR_WRITE_B32 name: mfma_16x16_write_agpr_accvgpr_write body: | @@ -192,21 +150,8 @@ body: | # GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write # GCN: V_MFMA_F32_32X32X2F32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 6 # GCN-NEXT: V_ACCVGPR_WRITE_B32 name: mfma_32x32_write_agpr_accvgpr_write body: | @@ -229,11 +174,7 @@ body: | # GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write # GCN: V_MFMA_F32_16X16X1F32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 4 # GCN-NEXT: V_ACCVGPR_WRITE_B32 name: mfma_16x16_read_srcc_accvgpr_write body: | @@ -245,19 +186,8 @@ body: | # GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write # GCN: V_MFMA_F32_32X32X2F32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 4 # GCN-NEXT: V_ACCVGPR_WRITE_B32 name: mfma_32x32_read_srcc_accvgpr_write body: | @@ -280,8 +210,7 @@ body: | # GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read # GCN: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: accvgpr_read_write_vgpr_mfma_read body: | @@ -293,8 +222,7 @@ body: | # GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read # GCN: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: V_ACCVGPR_WRITE_B32 name: accvgpr_read_write_vgpr_accvgpr_write_read body: | @@ -318,9 +246,7 @@ body: | # GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca # GCN: V_ACCVGPR_WRITE_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: accvgpr_write_agpr_mfma_read_srca body: | @@ -332,9 +258,7 @@ body: | # GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb # GCN: V_ACCVGPR_WRITE_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: accvgpr_write_agpr_mfma_read_srcb body: | @@ -346,9 +270,7 @@ body: | # GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read # GCN: V_ACCVGPR_WRITE_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_ACCVGPR_READ_B32 name: accvgpr_write_agpr_accvgpr_read body: | @@ -360,10 +282,7 @@ body: | # GCN-LABEL: name: vcmpx_write_exec_mfma # GCN: V_CMPX_EQ_I32_e32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: vcmpx_write_exec_mfma body: | @@ -375,10 +294,7 @@ body: | # GCN-LABEL: name: vcmpx_write_exec_accvgpr_write # GCN: V_CMPX_EQ_I32_e32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 3 # GCN-NEXT: V_ACCVGPR_WRITE_B32 name: vcmpx_write_exec_accvgpr_write body: | @@ -390,8 +306,7 @@ body: | # GCN-LABEL: name: accvgpr_read_write_vgpr_load # GCN: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: FLAT_LOAD_DWORD name: accvgpr_read_write_vgpr_load body: | @@ -403,8 +318,7 @@ body: | # GCN-LABEL: name: accvgpr_read_write_vgpr_ds_permute # GCN: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: DS_PERMUTE_B32 name: accvgpr_read_write_vgpr_ds_permute body: | @@ -416,8 +330,7 @@ body: | # GCN-LABEL: name: accvgpr_read_write_vgpr_flat_load # GCN: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: FLAT_LOAD_DWORD name: accvgpr_read_write_vgpr_flat_load body: | @@ -429,8 +342,7 @@ body: | # GCN-LABEL: name: accvgpr_read_write_vgpr_buffer_store # GCN: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: BUFFER_STORE_DWORD_OFFSET name: accvgpr_read_write_vgpr_buffer_store body: | @@ -442,8 +354,7 @@ body: | # GCN-LABEL: name: accvgpr_read_write_vgpr_store # GCN: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: DS_WRITE_B32 name: accvgpr_read_write_vgpr_store body: | @@ -497,8 +408,7 @@ body: | # GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend # GCN: V_MOV_B32 # GCN-NEXT: V_ACCVGPR_READ_B32 -# GCN-NEXT: S_NOP 0 -# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 1 # GCN-NEXT: FLAT_LOAD_DWORD name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend body: | diff --git a/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir b/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir index 5dbe5d58d9b..2efbe582fbb 100644 --- a/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir +++ b/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir @@ -2,11 +2,7 @@ # GCN-LABEL: name: vmem_vcc_fallthrough # GCN: bb.1: -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 4 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_fallthrough @@ -23,10 +19,7 @@ body: | ... # GCN-LABEL: name: vmem_vcc_branch_to_next # GCN: bb.1: -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 3 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_branch_to_next @@ -82,10 +75,7 @@ body: | ... # GCN-LABEL: name: vmem_vcc_branch_around # GCN: bb.2: -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 3 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_branch_around @@ -110,10 +100,7 @@ body: | $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_branch_backedge -# GCN: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN: S_NOP 3 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_branch_backedge @@ -132,11 +119,7 @@ body: | ... # GCN-LABEL: name: vmem_vcc_min_of_two # GCN: bb.2: -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 4 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_min_of_two @@ -159,10 +142,7 @@ body: | $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_self_loop -# GCN: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN: S_NOP 3 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_self_loop @@ -179,10 +159,7 @@ body: | # GCN-LABEL: name: vmem_vcc_min_of_two_self_loop1 # GCN: bb.1: # GCN: $sgpr0 = S_MOV_B32 0 -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 3 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_min_of_two_self_loop1 @@ -205,9 +182,7 @@ body: | # GCN-LABEL: name: vmem_vcc_min_of_two_self_loop2 # GCN: bb.1: # GCN: $sgpr0 = S_MOV_B32 0 -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP -# GCN-NEXT: S_NOP +# GCN-NEXT: S_NOP 2 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN --- name: vmem_vcc_min_of_two_self_loop2