mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Select s_buffer_load_dword with a non-constant SGPR offset
Summary: Apps that benefit: - alien isolation - bioshock infinite - civilization: beyond earth - company of heroes 2 - dirt showdown - dota 2 - F1 2015 - grid autosport - hitman - legend of grimrock - serious sam 3: bfe - shadow warrior - talos principle - total war: warhammer - UE4 demos: effects cave, elemental, sun temple Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D38914 llvm-svn: 317038
This commit is contained in:
parent
5bd9a961dd
commit
5cb35b4bac
@ -169,7 +169,6 @@ private:
|
||||
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
|
||||
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
|
||||
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
|
||||
bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
|
||||
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
|
||||
|
||||
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||
@ -1466,13 +1465,6 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
|
||||
return !Imm && isa<ConstantSDNode>(Offset);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
|
||||
SDValue &Offset) const {
|
||||
bool Imm;
|
||||
return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
|
||||
!isa<ConstantSDNode>(Offset);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
|
||||
SDValue &Base,
|
||||
SDValue &Offset) const {
|
||||
|
@ -878,13 +878,6 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
|
||||
// MUBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Offset in an 32-bit VGPR
|
||||
def : GCNPat <
|
||||
(SIload_constant v4i32:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, (i32 0), 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// buffer_load/store_format patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -3709,6 +3709,27 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
|
||||
Inst.eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
|
||||
unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
|
||||
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
|
||||
get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
|
||||
.add(*getNamedOperand(Inst, AMDGPU::OpName::soff)) // vaddr
|
||||
.add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
|
||||
.addImm(0) // soffset
|
||||
.addImm(0) // offset
|
||||
.addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
|
||||
|
||||
MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
|
||||
VDst);
|
||||
addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
|
||||
Inst.eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
|
||||
|
@ -239,7 +239,6 @@ def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
|
||||
def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
|
||||
def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
|
||||
def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
|
||||
def SMRDBufferSgpr : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">;
|
||||
|
||||
multiclass SMRD_Pattern <string Instr, ValueType vt> {
|
||||
|
||||
@ -282,7 +281,7 @@ def SM_LOAD_PATTERN : GCNPat < // name this pattern to reuse AddedComplexity on
|
||||
|
||||
// 2. Offset loaded in an 32bit SGPR
|
||||
def : GCNPat <
|
||||
(SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
|
||||
(SIload_constant v4i32:$sbase, i32:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0)
|
||||
>;
|
||||
|
||||
|
@ -175,6 +175,22 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}smrd_sgpr_offset:
|
||||
; GCN: s_buffer_load_dword s{{[0-9]}}, s[0:3], s4
|
||||
define amdgpu_ps float @smrd_sgpr_offset(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
|
||||
main_body:
|
||||
%r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}smrd_vgpr_offset:
|
||||
; GCN: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
|
||||
define amdgpu_ps float @smrd_vgpr_offset(<4 x i32> inreg %desc, i32 %offset) #0 {
|
||||
main_body:
|
||||
%r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user