From 304779755da422b9df418bf0eaa9b5f3c5708cd4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 24 Sep 2015 19:52:21 +0000 Subject: [PATCH] AMDGPU: Add cache invalidation instructions. These are necessary for implementing mem_fence for OpenCL 2.0. The VI assembler tests are disabled since it seems to be using the wrong encoding or opcode. llvm-svn: 248532 --- include/llvm/IR/IntrinsicsAMDGPU.td | 18 ++++++++++++++ lib/Target/AMDGPU/CIInstructions.td | 8 +++++++ lib/Target/AMDGPU/SIInstrInfo.td | 17 +++++++++++++ lib/Target/AMDGPU/SIInstructions.td | 13 ++++++---- .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll | 16 +++++++++++++ .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll | 14 +++++++++++ .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll | 16 +++++++++++++ test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s | 7 ++++++ test/MC/AMDGPU/mubuf.s | 24 +++++++++++++++++-- 9 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll create mode 100644 test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 510e5ad2d9b..c197a663001 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -83,3 +83,21 @@ def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic < "__builtin_amdgpu_read_workdim">; } // End TargetPrefix = "AMDGPU" + +let TargetPrefix = "amdgcn" in { + +// SI only +def int_amdgcn_buffer_wbinvl1_sc : + GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, + Intrinsic<[], [], []>; + +// On CI+ +def int_amdgcn_buffer_wbinvl1_vol : + GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, + Intrinsic<[], [], []>; + +def int_amdgcn_buffer_wbinvl1 : + GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, + Intrinsic<[], [], []>; + +} diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td index 9ec6fd12499..2bb740beebb 100644 --- a/lib/Target/AMDGPU/CIInstructions.td +++ b/lib/Target/AMDGPU/CIInstructions.td @@ -99,6 +99,14 @@ defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f // DS_CONDXCHG32_RTN_B64 // DS_CONDXCHG32_RTN_B128 +//===----------------------------------------------------------------------===// +// MUBUF Instructions +//===----------------------------------------------------------------------===// + +defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate , + "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol +>; + //===----------------------------------------------------------------------===// // Flat Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 08237826e78..4f478104564 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -2455,6 +2455,23 @@ multiclass MUBUF_Store_Helper { + let hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" in { + def "" : MUBUF_Pseudo ; + + // Set everything to 0. + let offset = 0, offen = 0, idxen = 0, glc = 0, vaddr = 0, + vdata = 0, srsrc = 0, slc = 0, tfe = 0, soffset = 0 in { + let addr64 = 0 in { + def _si : MUBUF_Real_si ; + } + + def _vi : MUBUF_Real_vi ; + } + } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" +} + class FLAT_Load_Helper op, string asm, RegisterClass regClass> : FLAT getGeneration() " ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureGCN">; def isSI : Predicate<"Subtarget->getGeneration() " - "== AMDGPUSubtarget::SOUTHERN_ISLANDS">; + "== AMDGPUSubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureSouthernIslands">; + def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; @@ -1028,9 +1030,12 @@ defm BUFFER_ATOMIC_XOR : MUBUF_Atomic < //def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 , "buffer_atomic_fcmpswap_x2", []>; // isn't on VI //def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 , "buffer_atomic_fmin_x2", []>; // isn't on VI //def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 , "buffer_atomic_fmax_x2", []>; // isn't on VI -//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 , "buffer_wbinvl1_sc", []>; // isn't on CI & VI -//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 , "buffer_wbinvl1_vol", []>; // isn't on SI -//def BUFFER_WBINVL1 : MUBUF_WBINVL1 , "buffer_wbinvl1", []>; + +let SubtargetPredicate = isSI in { +defm BUFFER_WBINVL1_SC : MUBUF_Invalidate , "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI +} + +defm BUFFER_WBINVL1 : MUBUF_Invalidate , "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>; //===----------------------------------------------------------------------===// // MTBUF Instructions diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll new file mode 100644 index 00000000000..6d9db65e7d9 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.buffer.wbinvl1() #0 + +; GCN-LABEL: {{^}}test_buffer_wbinvl1: +; GCN-NEXT: ; BB#0: +; SI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00] +; VI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_buffer_wbinvl1() #0 { + call void @llvm.amdgcn.buffer.wbinvl1() + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll new file mode 100644 index 00000000000..746298465e5 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s + +declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0 + +; SI-LABEL: {{^}}test_buffer_wbinvl1_sc: +; SI-NEXT: ; BB#0: +; SI-NEXT: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] +; SI-NEXT: s_endpgm +define void @test_buffer_wbinvl1_sc() #0 { + call void @llvm.amdgcn.buffer.wbinvl1.sc() + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll new file mode 100644 index 00000000000..cecfcb1bfe7 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0 + +; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol: +; GCN-NEXT: ; BB#0: +; CI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] +; VI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_buffer_wbinvl1_vol() #0 { + call void @llvm.amdgcn.buffer.wbinvl1.vol() + ret void +} + +attributes #0 = { nounwind } diff --git a/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s b/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s new file mode 100644 index 00000000000..aa0a1ab8646 --- /dev/null +++ b/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s @@ -0,0 +1,7 @@ +// XFAIL: * +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s + +; When assembled, this emits a different encoding value than codegen for the intrinsic + +buffer_wbinvl1_vol +// VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] diff --git a/test/MC/AMDGPU/mubuf.s b/test/MC/AMDGPU/mubuf.s index 6fee332d1d2..18cca702269 100644 --- a/test/MC/AMDGPU/mubuf.s +++ b/test/MC/AMDGPU/mubuf.s @@ -1,5 +1,9 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s -// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s + +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s //===----------------------------------------------------------------------===// // Test for different operand combinations @@ -349,4 +353,20 @@ buffer_store_dwordx2 v[1:2], s[4:7], s1 buffer_store_dwordx4 v[1:4], s[4:7], s1 // SICI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01] +//===----------------------------------------------------------------------===// +// Cache invalidation +//===----------------------------------------------------------------------===// + +buffer_wbinvl1 +// SICI: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00] + +buffer_wbinvl1_sc +// SI: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] +// NOCI: error: instruction not supported on this GPU +// NOVI: error: instruction not supported on this GPU + +buffer_wbinvl1_vol +// CI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] +// NOSI: error: instruction not supported on this GPU + // TODO: Atomics