[AMDGPU] Support SCC on buffer atomics

Differential Revision: https://reviews.llvm.org/D98731
2024-11-26 12:43:36 +01:00 · 2021-03-16 11:57:45 -07:00 · 2021-03-16 11:57:45 -07:00 · d56387770d
commit d56387770d
parent 90896be0cc
4 changed files with 35 additions and 10 deletions
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@ -679,7 +679,7 @@ class MUBUF_Atomic_Pseudo<string opName,
  let has_glc = 0;
  let has_dlc = 0;
  let has_tfe = 0;
-  let has_sccb = 0;
+  let has_sccb = 1;
  let maybeAtomic = 1;
  let AsmMatchConverter = "cvtMubufAtomic";
 }
@ -2259,7 +2259,8 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
 // GFX8, GFX9 (VI).
 //===----------------------------------------------------------------------===//

-class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
+class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc,
+                          bit has_sccb = ps.has_sccb> :
  MUBUF_Real<ps>,
  Enc64,
  SIMCInstr<ps.PseudoInstr, Enc>,
@ -2270,7 +2271,7 @@ class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
  let Inst{12}    = ps.offen;
  let Inst{13}    = ps.idxen;
  let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
-  let Inst{15}    = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
+  let Inst{15}    = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
  let Inst{16}    = ps.lds;
  let Inst{17}    = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
  let Inst{24-18} = op;
@ -2281,26 +2282,28 @@ class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
  let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
 }

-class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.VI> {
+class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps, bit has_sccb = ps.has_sccb> :
+  MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.VI, has_sccb> {
  let AssemblerPredicate = isGFX8GFX9NotGFX90A;
  let DecoderNamespace = "GFX8";

  let Inst{55}    = !if(ps.has_tfe, tfe, ?);
 }

-class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX90A> {
+class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps,
+                         bit has_sccb = ps.has_sccb> :
+  MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX90A, has_sccb> {
  let AssemblerPredicate = isGFX90APlus;
  let DecoderNamespace = "GFX90A";
-  let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands);
+  let AsmString = ps.Mnemonic # !subst("$sccb", !if(has_sccb, "$sccb",""),
+                                !subst("$tfe", "", ps.AsmOperands));

  let Inst{55}    = acc;
 }

 multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
  def _vi :     MUBUF_Real_vi<op, ps>;
-  def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
+  def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>;
 }

 multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
@ -2483,7 +2486,7 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;

 } // End SubtargetPredicate = HasAtomicFaddInsts

-let SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus in {
+let SubtargetPredicate = isGFX90APlus in {
  defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;
  defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_vi<0x50>;
  defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_vi<0x51>;
--- a/test/MC/AMDGPU/gfx90a_asm_features.s
+++ b/test/MC/AMDGPU/gfx90a_asm_features.s
@ -1046,3 +1046,7 @@ global_atomic_add_f32 v1, v0, v2, s[0:1] glc ; encoding: [0x00,0x80,0x35,0xdd,0x
 // GFX1010: error: instruction not supported on this GPU
 // GFX90A: global_atomic_pk_add_f16  v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x39,0xdd,0x00,0x02,0x7f,0x00]
 global_atomic_pk_add_f16 v0, v[0:1], v2, off glc
+
+// NOT-GFX90A: error: scc modifier is not supported on this GPU
+// GFX90A: buffer_atomic_add v4, off, s[8:11], s3 scc ; encoding: [0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_add v4, off, s[8:11], s3 scc
--- a/test/MC/AMDGPU/gfx90a_err.s
+++ b/test/MC/AMDGPU/gfx90a_err.s
@ -231,6 +231,21 @@ global_atomic_min_f64 v[0:1], v[2:3], off scc
 global_atomic_max_f64 v[0:1], v[2:3], off scc
 // GFX90A: error: instruction must not use scc

+buffer_atomic_add_f32 v4, off, s[8:11], s3 scc
+// GFX90A: error: instruction must not use scc
+
+buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 scc
+// GFX90A: error: instruction must not use scc
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 scc
+// GFX90A: error: instruction must not use scc
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 scc
+// GFX90A: error: instruction must not use scc
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 scc
+// GFX90A: error: instruction must not use scc
+
 v_mov_b32_sdwa v1, src_lds_direct dst_sel:DWORD
 // GFX90A: error: lds_direct is not supported on this GPU

--- a/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt
+++ b/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt
@ -793,3 +793,6 @@

 # GFX90A: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc ; encoding: [0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00]
 0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00
+
+# GFX90A: buffer_atomic_add v4, off, s[8:11], s3 scc ; encoding: [0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03