From 28876d6a1585c7dba9534a454dc7c8968c6cc1fa Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 12 Jan 2020 19:04:24 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Select DS GWS intrinsics --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 116 ++++++++++++++++++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + .../GlobalISel/llvm.amdgcn.ds.gws.barrier.ll | 9 ++ .../GlobalISel/llvm.amdgcn.ds.gws.init.ll | 5 + .../GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll | 5 + .../llvm.amdgcn.ds.gws.sema.release.all.ll | 8 ++ .../GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll | 5 + .../AMDGPU/llvm.amdgcn.ds.gws.barrier.ll | 37 ++++-- .../CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll | 36 ++++-- 9 files changed, 202 insertions(+), 20 deletions(-) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll create mode 100644 test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll create mode 100644 test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll create mode 100644 test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll create mode 100644 test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 873ac9bb0ba..5ab8d2d8479 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1088,6 +1088,115 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic( return Ret; } +static unsigned gwsIntrinToOpcode(unsigned IntrID) { + switch (IntrID) { + case Intrinsic::amdgcn_ds_gws_init: + return AMDGPU::DS_GWS_INIT; + case Intrinsic::amdgcn_ds_gws_barrier: + return AMDGPU::DS_GWS_BARRIER; + case Intrinsic::amdgcn_ds_gws_sema_v: + return AMDGPU::DS_GWS_SEMA_V; + case Intrinsic::amdgcn_ds_gws_sema_br: + return AMDGPU::DS_GWS_SEMA_BR; + case Intrinsic::amdgcn_ds_gws_sema_p: + return AMDGPU::DS_GWS_SEMA_P; + case Intrinsic::amdgcn_ds_gws_sema_release_all: + return AMDGPU::DS_GWS_SEMA_RELEASE_ALL; + default: + llvm_unreachable("not a gws intrinsic"); + } +} + +bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, + Intrinsic::ID IID) const { + if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all && + !STI.hasGWSSemaReleaseAll()) + return false; + + // intrinsic ID, vsrc, offset + const bool HasVSrc = MI.getNumOperands() == 3; + assert(HasVSrc || MI.getNumOperands() == 2); + + Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg(); + const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI); + if (OffsetRB->getID() != AMDGPU::SGPRRegBankID) + return false; + + MachineInstr *OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI); + assert(OffsetDef); + + unsigned ImmOffset; + + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + + MachineInstr *Readfirstlane = nullptr; + + // If we legalized the VGPR input, strip out the readfirstlane to analyze the + // incoming offset, in case there's an add of a constant. We'll have to put it + // back later. + if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) { + Readfirstlane = OffsetDef; + BaseOffset = OffsetDef->getOperand(1).getReg(); + OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI); + } + + if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) { + // If we have a constant offset, try to use the 0 in m0 as the base. + // TODO: Look into changing the default m0 initialization value. If the + // default -1 only set the low 16-bits, we could leave it as-is and add 1 to + // the immediate offset. + + ImmOffset = OffsetDef->getOperand(1).getCImm()->getZExtValue(); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addImm(0); + } else { + std::tie(BaseOffset, ImmOffset, OffsetDef) + = AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset); + + if (Readfirstlane) { + // We have the constant offset now, so put the readfirstlane back on the + // variable component. + if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI)) + return false; + + Readfirstlane->getOperand(1).setReg(BaseOffset); + BaseOffset = Readfirstlane->getOperand(0).getReg(); + } else { + if (!RBI.constrainGenericRegister(BaseOffset, + AMDGPU::SReg_32RegClass, *MRI)) + return false; + } + + Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base) + .addReg(BaseOffset) + .addImm(16); + + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(M0Base); + } + + // The resource id offset is computed as ( + M0[21:16] + + // offset field) % 64. Some versions of the programming guide omit the m0 + // part, or claim it's from offset 0. + auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID))); + + if (HasVSrc) { + Register VSrc = MI.getOperand(1).getReg(); + MIB.addReg(VSrc); + if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI)) + return false; + } + + MIB.addImm(ImmOffset) + .addImm(-1) // $gds + .cloneMemRefs(MI); + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -1114,6 +1223,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: return selectDSOrderedIntrinsic(I, IntrinsicID); + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_br: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: + return selectDSGWSIntrinsic(I, IntrinsicID); default: return selectImpl(I, *CoverageInfo); } diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 38ca7fd4104..6f5268629be 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -101,6 +101,7 @@ private: bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const; bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; + bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll new file mode 100644 index 00000000000..17293926f03 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll @@ -0,0 +1,9 @@ +; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.barrier.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll + +; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos. +; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll new file mode 100644 index 00000000000..942c991bbd4 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll @@ -0,0 +1,5 @@ +; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll new file mode 100644 index 00000000000..a3908c51688 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll @@ -0,0 +1,5 @@ +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.br.ll diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll new file mode 100644 index 00000000000..550a7312cbe --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll @@ -0,0 +1,8 @@ +; RUN: not llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll 2>&1 | FileCheck -enable-var-scope -check-prefix=GFX6ERR-GISEL %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll + +; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store 4 into custom "GWSResource") (in function: gws_sema_release_all_offset0) + diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll new file mode 100644 index 00000000000..fe38b56ea25 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll @@ -0,0 +1,5 @@ +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.v.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.v.ll diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll index a781fb50060..ba8acd8a922 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s ; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos. ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s @@ -51,8 +51,13 @@ define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 { ; FIXME: Should be able to shift directly into m0 ; GCN-LABEL: {{^}}gws_barrier_sgpr_offset: ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 + + ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]] ; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}} define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 { @@ -63,8 +68,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 { ; Variable offset in SGPR with constant add ; GCN-LABEL: {{^}}gws_barrier_sgpr_offset_add1: ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 + ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]] ; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:1 gds{{$}} define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 { @@ -76,8 +85,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.ba ; GCN-LABEL: {{^}}gws_barrier_vgpr_offset: ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16 + ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]] ; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}} define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 { @@ -90,8 +103,12 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 { ; GCN-LABEL: {{^}}gws_barrier_vgpr_offset_add: ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16 + ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]] ; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:3 gds{{$}} define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll index aa490f15d97..fe289fe06f7 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s ; Minimum offset ; GCN-LABEL: {{^}}gws_init_offset0: @@ -47,8 +47,12 @@ define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 { ; FIXME: Should be able to shift directly into m0 ; GCN-LABEL: {{^}}gws_init_sgpr_offset: ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 + ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]] ; NOLOOP: ds_gws_init [[GWS_VAL]] gds{{$}} define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 { @@ -59,8 +63,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 { ; Variable offset in SGPR with constant add ; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1: ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}} -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16 + ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]] ; NOLOOP: ds_gws_init [[GWS_VAL]] offset:1 gds{{$}} define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 { @@ -72,8 +80,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) ; GCN-LABEL: {{^}}gws_init_vgpr_offset: ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16 + ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]] ; NOLOOP: ds_gws_init v0 gds{{$}} define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 { @@ -86,8 +98,12 @@ define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 { ; GCN-LABEL: {{^}}gws_init_vgpr_offset_add: ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]] ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 -; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 -; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16 +; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}} + +; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16 + ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]] ; NOLOOP: ds_gws_init v0 offset:3 gds{{$}} define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {