1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

AMDGPU/GlobalISel: Select DS GWS intrinsics

This commit is contained in:
Matt Arsenault 2020-01-12 19:04:24 -05:00 committed by Matt Arsenault
parent cfc365cf6e
commit 28876d6a15
9 changed files with 202 additions and 20 deletions

View File

@ -1088,6 +1088,115 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
return Ret;
}
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
return AMDGPU::DS_GWS_INIT;
case Intrinsic::amdgcn_ds_gws_barrier:
return AMDGPU::DS_GWS_BARRIER;
case Intrinsic::amdgcn_ds_gws_sema_v:
return AMDGPU::DS_GWS_SEMA_V;
case Intrinsic::amdgcn_ds_gws_sema_br:
return AMDGPU::DS_GWS_SEMA_BR;
case Intrinsic::amdgcn_ds_gws_sema_p:
return AMDGPU::DS_GWS_SEMA_P;
case Intrinsic::amdgcn_ds_gws_sema_release_all:
return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
default:
llvm_unreachable("not a gws intrinsic");
}
}
bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
Intrinsic::ID IID) const {
if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
!STI.hasGWSSemaReleaseAll())
return false;
// intrinsic ID, vsrc, offset
const bool HasVSrc = MI.getNumOperands() == 3;
assert(HasVSrc || MI.getNumOperands() == 2);
Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();
const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)
return false;
MachineInstr *OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
assert(OffsetDef);
unsigned ImmOffset;
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *Readfirstlane = nullptr;
// If we legalized the VGPR input, strip out the readfirstlane to analyze the
// incoming offset, in case there's an add of a constant. We'll have to put it
// back later.
if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
Readfirstlane = OffsetDef;
BaseOffset = OffsetDef->getOperand(1).getReg();
OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
}
if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {
// If we have a constant offset, try to use the 0 in m0 as the base.
// TODO: Look into changing the default m0 initialization value. If the
// default -1 only set the low 16-bits, we could leave it as-is and add 1 to
// the immediate offset.
ImmOffset = OffsetDef->getOperand(1).getCImm()->getZExtValue();
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addImm(0);
} else {
std::tie(BaseOffset, ImmOffset, OffsetDef)
= AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset);
if (Readfirstlane) {
// We have the constant offset now, so put the readfirstlane back on the
// variable component.
if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
return false;
Readfirstlane->getOperand(1).setReg(BaseOffset);
BaseOffset = Readfirstlane->getOperand(0).getReg();
} else {
if (!RBI.constrainGenericRegister(BaseOffset,
AMDGPU::SReg_32RegClass, *MRI))
return false;
}
Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
.addReg(BaseOffset)
.addImm(16);
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
.addReg(M0Base);
}
// The resource id offset is computed as (<isa opaque base> + M0[21:16] +
// offset field) % 64. Some versions of the programming guide omit the m0
// part, or claim it's from offset 0.
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID)));
if (HasVSrc) {
Register VSrc = MI.getOperand(1).getReg();
MIB.addReg(VSrc);
if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
return false;
}
MIB.addImm(ImmOffset)
.addImm(-1) // $gds
.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@ -1114,6 +1223,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
return selectDSOrderedIntrinsic(I, IntrinsicID);
case Intrinsic::amdgcn_ds_gws_init:
case Intrinsic::amdgcn_ds_gws_barrier:
case Intrinsic::amdgcn_ds_gws_sema_v:
case Intrinsic::amdgcn_ds_gws_sema_br:
case Intrinsic::amdgcn_ds_gws_sema_p:
case Intrinsic::amdgcn_ds_gws_sema_release_all:
return selectDSGWSIntrinsic(I, IntrinsicID);
default:
return selectImpl(I, *CoverageInfo);
}

View File

@ -101,6 +101,7 @@ private:
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;

View File

@ -0,0 +1,9 @@
; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.barrier.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll
; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll

View File

@ -0,0 +1,5 @@
; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll

View File

@ -0,0 +1,5 @@
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.br.ll

View File

@ -0,0 +1,8 @@
; RUN: not llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll 2>&1 | FileCheck -enable-var-scope -check-prefix=GFX6ERR-GISEL %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store 4 into custom "GWSResource") (in function: gws_sema_release_all_offset0)

View File

@ -0,0 +1,5 @@
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.v.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.v.ll

View File

@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s
; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s
@ -51,8 +51,13 @@ define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 {
; FIXME: Should be able to shift directly into m0
; GCN-LABEL: {{^}}gws_barrier_sgpr_offset:
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
@ -63,8 +68,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
; Variable offset in SGPR with constant add
; GCN-LABEL: {{^}}gws_barrier_sgpr_offset_add1:
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:1 gds{{$}}
define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
@ -76,8 +85,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.ba
; GCN-LABEL: {{^}}gws_barrier_vgpr_offset:
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
@ -90,8 +103,12 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
; GCN-LABEL: {{^}}gws_barrier_vgpr_offset_add:
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:3 gds{{$}}
define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 {

View File

@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
; Minimum offset
; GCN-LABEL: {{^}}gws_init_offset0:
@ -47,8 +47,12 @@ define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 {
; FIXME: Should be able to shift directly into m0
; GCN-LABEL: {{^}}gws_init_sgpr_offset:
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
; NOLOOP: ds_gws_init [[GWS_VAL]] gds{{$}}
define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
@ -59,8 +63,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
; Variable offset in SGPR with constant add
; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1:
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
; NOLOOP: ds_gws_init [[GWS_VAL]] offset:1 gds{{$}}
define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
@ -72,8 +80,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base)
; GCN-LABEL: {{^}}gws_init_vgpr_offset:
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
; NOLOOP: ds_gws_init v0 gds{{$}}
define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
@ -86,8 +98,12 @@ define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
; GCN-LABEL: {{^}}gws_init_vgpr_offset_add:
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
; NOLOOP: ds_gws_init v0 offset:3 gds{{$}}
define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {