mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU/GlobalISel: Select DS GWS intrinsics
This commit is contained in:
parent
cfc365cf6e
commit
28876d6a15
@ -1088,6 +1088,115 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
|
||||
return Ret;
|
||||
}
|
||||
|
||||
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_ds_gws_init:
|
||||
return AMDGPU::DS_GWS_INIT;
|
||||
case Intrinsic::amdgcn_ds_gws_barrier:
|
||||
return AMDGPU::DS_GWS_BARRIER;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_v:
|
||||
return AMDGPU::DS_GWS_SEMA_V;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_br:
|
||||
return AMDGPU::DS_GWS_SEMA_BR;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_p:
|
||||
return AMDGPU::DS_GWS_SEMA_P;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_release_all:
|
||||
return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
|
||||
default:
|
||||
llvm_unreachable("not a gws intrinsic");
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
|
||||
Intrinsic::ID IID) const {
|
||||
if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
|
||||
!STI.hasGWSSemaReleaseAll())
|
||||
return false;
|
||||
|
||||
// intrinsic ID, vsrc, offset
|
||||
const bool HasVSrc = MI.getNumOperands() == 3;
|
||||
assert(HasVSrc || MI.getNumOperands() == 2);
|
||||
|
||||
Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();
|
||||
const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
|
||||
if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)
|
||||
return false;
|
||||
|
||||
MachineInstr *OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
|
||||
assert(OffsetDef);
|
||||
|
||||
unsigned ImmOffset;
|
||||
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
MachineInstr *Readfirstlane = nullptr;
|
||||
|
||||
// If we legalized the VGPR input, strip out the readfirstlane to analyze the
|
||||
// incoming offset, in case there's an add of a constant. We'll have to put it
|
||||
// back later.
|
||||
if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
|
||||
Readfirstlane = OffsetDef;
|
||||
BaseOffset = OffsetDef->getOperand(1).getReg();
|
||||
OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
|
||||
}
|
||||
|
||||
if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {
|
||||
// If we have a constant offset, try to use the 0 in m0 as the base.
|
||||
// TODO: Look into changing the default m0 initialization value. If the
|
||||
// default -1 only set the low 16-bits, we could leave it as-is and add 1 to
|
||||
// the immediate offset.
|
||||
|
||||
ImmOffset = OffsetDef->getOperand(1).getCImm()->getZExtValue();
|
||||
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
|
||||
.addImm(0);
|
||||
} else {
|
||||
std::tie(BaseOffset, ImmOffset, OffsetDef)
|
||||
= AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset);
|
||||
|
||||
if (Readfirstlane) {
|
||||
// We have the constant offset now, so put the readfirstlane back on the
|
||||
// variable component.
|
||||
if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
|
||||
return false;
|
||||
|
||||
Readfirstlane->getOperand(1).setReg(BaseOffset);
|
||||
BaseOffset = Readfirstlane->getOperand(0).getReg();
|
||||
} else {
|
||||
if (!RBI.constrainGenericRegister(BaseOffset,
|
||||
AMDGPU::SReg_32RegClass, *MRI))
|
||||
return false;
|
||||
}
|
||||
|
||||
Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
|
||||
.addReg(BaseOffset)
|
||||
.addImm(16);
|
||||
|
||||
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
|
||||
.addReg(M0Base);
|
||||
}
|
||||
|
||||
// The resource id offset is computed as (<isa opaque base> + M0[21:16] +
|
||||
// offset field) % 64. Some versions of the programming guide omit the m0
|
||||
// part, or claim it's from offset 0.
|
||||
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID)));
|
||||
|
||||
if (HasVSrc) {
|
||||
Register VSrc = MI.getOperand(1).getReg();
|
||||
MIB.addReg(VSrc);
|
||||
if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
|
||||
return false;
|
||||
}
|
||||
|
||||
MIB.addImm(ImmOffset)
|
||||
.addImm(-1) // $gds
|
||||
.cloneMemRefs(MI);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
||||
MachineInstr &I) const {
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
@ -1114,6 +1223,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
||||
case Intrinsic::amdgcn_ds_ordered_add:
|
||||
case Intrinsic::amdgcn_ds_ordered_swap:
|
||||
return selectDSOrderedIntrinsic(I, IntrinsicID);
|
||||
case Intrinsic::amdgcn_ds_gws_init:
|
||||
case Intrinsic::amdgcn_ds_gws_barrier:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_v:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_br:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_p:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_release_all:
|
||||
return selectDSGWSIntrinsic(I, IntrinsicID);
|
||||
default:
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
}
|
||||
|
@ -101,6 +101,7 @@ private:
|
||||
|
||||
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
|
||||
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
|
||||
bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
|
||||
|
||||
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
|
||||
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
|
||||
|
@ -0,0 +1,9 @@
|
||||
; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.barrier.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll
|
||||
|
||||
; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
|
||||
; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll
|
@ -0,0 +1,5 @@
|
||||
; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll
|
@ -0,0 +1,5 @@
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.br.ll
|
@ -0,0 +1,8 @@
|
||||
; RUN: not llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll 2>&1 | FileCheck -enable-var-scope -check-prefix=GFX6ERR-GISEL %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
|
||||
|
||||
; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store 4 into custom "GWSResource") (in function: gws_sema_release_all_offset0)
|
||||
|
@ -0,0 +1,5 @@
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.v.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.v.ll
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s
|
||||
|
||||
; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s
|
||||
@ -51,8 +51,13 @@ define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 {
|
||||
; FIXME: Should be able to shift directly into m0
|
||||
; GCN-LABEL: {{^}}gws_barrier_sgpr_offset:
|
||||
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
|
||||
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
|
||||
define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
|
||||
@ -63,8 +68,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
|
||||
; Variable offset in SGPR with constant add
|
||||
; GCN-LABEL: {{^}}gws_barrier_sgpr_offset_add1:
|
||||
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:1 gds{{$}}
|
||||
define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
|
||||
@ -76,8 +85,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.ba
|
||||
; GCN-LABEL: {{^}}gws_barrier_vgpr_offset:
|
||||
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
|
||||
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
|
||||
define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
|
||||
@ -90,8 +103,12 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
|
||||
; GCN-LABEL: {{^}}gws_barrier_vgpr_offset_add:
|
||||
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
|
||||
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:3 gds{{$}}
|
||||
define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 {
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
|
||||
|
||||
; Minimum offset
|
||||
; GCN-LABEL: {{^}}gws_init_offset0:
|
||||
@ -47,8 +47,12 @@ define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 {
|
||||
; FIXME: Should be able to shift directly into m0
|
||||
; GCN-LABEL: {{^}}gws_init_sgpr_offset:
|
||||
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_init [[GWS_VAL]] gds{{$}}
|
||||
define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
|
||||
@ -59,8 +63,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
|
||||
; Variable offset in SGPR with constant add
|
||||
; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1:
|
||||
; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_init [[GWS_VAL]] offset:1 gds{{$}}
|
||||
define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
|
||||
@ -72,8 +80,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base)
|
||||
; GCN-LABEL: {{^}}gws_init_vgpr_offset:
|
||||
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
|
||||
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_init v0 gds{{$}}
|
||||
define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
|
||||
@ -86,8 +98,12 @@ define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
|
||||
; GCN-LABEL: {{^}}gws_init_vgpr_offset_add:
|
||||
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
|
||||
; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
|
||||
; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
|
||||
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
|
||||
|
||||
; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
|
||||
|
||||
; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_init v0 offset:3 gds{{$}}
|
||||
define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {
|
||||
|
Loading…
x
Reference in New Issue
Block a user