mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 19:42:54 +02:00
AMDGPU: Fold immediate offset into BUFFER_LOAD_DWORD lowered from SMEM
Summary: -5.3% code size in affected shaders. Changed stats only: 48486 shaders in 30489 tests Totals: SGPRS: 2086406 -> 2072430 (-0.67 %) VGPRS: 1626872 -> 1627960 (0.07 %) Spilled SGPRs: 7865 -> 7912 (0.60 %) Code Size: 60978060 -> 60188764 (-1.29 %) bytes Max Waves: 374530 -> 374342 (-0.05 %) Totals from affected shaders: SGPRS: 299664 -> 285688 (-4.66 %) VGPRS: 233844 -> 234932 (0.47 %) Spilled SGPRs: 3959 -> 4006 (1.19 %) Code Size: 14905272 -> 14115976 (-5.30 %) bytes Max Waves: 46202 -> 46014 (-0.41 %) Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D38915 llvm-svn: 317750
This commit is contained in:
parent
f7a8beb9b6
commit
438fa3e3f9
@ -983,14 +983,6 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool isLegalMUBUFImmOffset(unsigned Imm) {
|
||||
return isUInt<12>(Imm);
|
||||
}
|
||||
|
||||
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
|
||||
return isLegalMUBUFImmOffset(Imm->getZExtValue());
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &Offen,
|
||||
@ -1032,7 +1024,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
Ptr = N0;
|
||||
}
|
||||
|
||||
if (isLegalMUBUFImmOffset(C1)) {
|
||||
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
|
||||
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
||||
return true;
|
||||
}
|
||||
@ -1142,7 +1134,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
|
||||
|
||||
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
||||
unsigned Imm = CAddr->getZExtValue();
|
||||
assert(!isLegalMUBUFImmOffset(Imm) &&
|
||||
assert(!SIInstrInfo::isLegalMUBUFImmOffset(Imm) &&
|
||||
"should have been selected by other pattern");
|
||||
|
||||
SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
|
||||
@ -1169,7 +1161,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
|
||||
|
||||
// Offsets in vaddr must be positive.
|
||||
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
||||
if (isLegalMUBUFImmOffset(C1)) {
|
||||
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
|
||||
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
|
||||
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
||||
return true;
|
||||
@ -1188,7 +1180,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
|
||||
SDValue &SOffset,
|
||||
SDValue &Offset) const {
|
||||
ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
|
||||
if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
|
||||
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
|
||||
return false;
|
||||
|
||||
SDLoc DL(Addr);
|
||||
|
@ -3712,13 +3712,43 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
|
||||
unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
|
||||
auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
|
||||
unsigned Offset = 0;
|
||||
|
||||
// See if we can extract an immediate offset by recognizing one of these:
|
||||
// V_ADD_I32_e32 dst, imm, src1
|
||||
// V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
|
||||
// V_ADD will be removed by "Remove dead machine instructions".
|
||||
if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
|
||||
const MachineOperand *Src =
|
||||
getNamedOperand(*Add, AMDGPU::OpName::src0);
|
||||
|
||||
if (Src && Src->isReg()) {
|
||||
auto Mov = MRI.getUniqueVRegDef(Src->getReg());
|
||||
if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
|
||||
Src = &Mov->getOperand(1);
|
||||
}
|
||||
|
||||
if (Src) {
|
||||
if (Src->isImm())
|
||||
Offset = Src->getImm();
|
||||
else if (Src->isCImm())
|
||||
Offset = Src->getCImm()->getZExtValue();
|
||||
}
|
||||
|
||||
if (Offset && isLegalMUBUFImmOffset(Offset))
|
||||
VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
|
||||
else
|
||||
Offset = 0;
|
||||
}
|
||||
|
||||
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
|
||||
get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
|
||||
.add(*getNamedOperand(Inst, AMDGPU::OpName::soff)) // vaddr
|
||||
.add(*VAddr) // vaddr
|
||||
.add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
|
||||
.addImm(0) // soffset
|
||||
.addImm(0) // offset
|
||||
.addImm(Offset) // offset
|
||||
.addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
|
@ -860,6 +860,10 @@ public:
|
||||
|
||||
static bool isKillTerminator(unsigned Opcode);
|
||||
const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
|
||||
|
||||
static bool isLegalMUBUFImmOffset(unsigned Imm) {
|
||||
return isUInt<12>(Imm);
|
||||
}
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
@ -191,6 +191,27 @@ main_body:
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
|
||||
; GCN-NEXT: BB#
|
||||
; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
|
||||
define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
|
||||
main_body:
|
||||
%off = add i32 %offset, 4095
|
||||
%r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large:
|
||||
; GCN-NEXT: BB#
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
|
||||
; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
|
||||
define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
|
||||
main_body:
|
||||
%off = add i32 %offset, 4096
|
||||
%r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user