1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

AMDGPU/GlobalISel: Select llvm.amdgcn.s.buffer.load

Doesn't try to fail on the dlc bit pre-gfx10 like the DAG lowering
does.
This commit is contained in:
Matt Arsenault 2020-01-29 20:34:32 -05:00
parent 49633b5f44
commit 2a8cc659dc
5 changed files with 4336 additions and 3 deletions

View File

@ -108,6 +108,14 @@ def gi_mubuf_offset_atomic :
GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">, GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
GIComplexPatternEquiv<MUBUFOffsetAtomic>; GIComplexPatternEquiv<MUBUFOffsetAtomic>;
def gi_smrd_buffer_imm :
GIComplexOperandMatcher<s64, "selectSMRDBufferImm">,
GIComplexPatternEquiv<SMRDBufferImm>;
def gi_smrd_buffer_imm32 :
GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">,
GIComplexPatternEquiv<SMRDBufferImm32>;
// Separate load nodes are defined to glue m0 initialization in // Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization // SelectionDAG. The GISel selector can just insert m0 initialization
// directly before before selecting a glue-less load, so hide this // directly before before selecting a glue-less load, so hide this
@ -182,6 +190,7 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>; def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>; def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>; def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
class GISelSop2Pat < class GISelSop2Pat <
SDPatternOperator node, SDPatternOperator node,

View File

@ -2859,6 +2859,45 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
}}; }};
} }
/// Get an immediate that must be 32-bits, and treated as zero extended.
static Optional<uint64_t> getConstantZext32Val(Register Reg,
const MachineRegisterInfo &MRI) {
// getConstantVRegVal sexts any values, so see if that matters.
Optional<int64_t> OffsetVal = getConstantVRegVal(Reg, MRI);
if (!OffsetVal || !isInt<32>(*OffsetVal))
return None;
return Lo_32(*OffsetVal);
}
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
if (!OffsetVal)
return {};
Optional<int64_t> EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal);
if (!EncodedImm)
return {};
return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
}
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
assert(STI.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
if (!OffsetVal)
return {};
Optional<int64_t> EncodedImm
= AMDGPU::getSMRDEncodedLiteralOffset32(STI, *OffsetVal);
if (!EncodedImm)
return {};
return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
}
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI, const MachineInstr &MI,
int OpIdx) const { int OpIdx) const {

View File

@ -222,6 +222,9 @@ private:
InstructionSelector::ComplexRendererFns InstructionSelector::ComplexRendererFns
selectMUBUFAddr64Atomic(MachineOperand &Root) const; selectMUBUFAddr64Atomic(MachineOperand &Root) const;
ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const; int OpIdx = -1) const;

View File

@ -769,7 +769,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 1. Offset as an immediate // 1. Offset as an immediate
def : GCNPat < def : GCNPat <
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (extract_glc $cachepolicy), (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
(extract_dlc $cachepolicy)))> { (extract_dlc $cachepolicy)))> {
let AddedComplexity = 2; let AddedComplexity = 2;
} }
@ -777,7 +777,8 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 2. 32-bit IMM offset on CI // 2. 32-bit IMM offset on CI
def : GCNPat < def : GCNPat <
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)), (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
(!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> { (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
(extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
let OtherPredicates = [isGFX7Only]; let OtherPredicates = [isGFX7Only];
let AddedComplexity = 1; let AddedComplexity = 1;
} }
@ -785,7 +786,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 3. Offset loaded in an 32bit SGPR // 3. Offset loaded in an 32bit SGPR
def : GCNPat < def : GCNPat <
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy), (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (extract_glc $cachepolicy), (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
(extract_dlc $cachepolicy))) (extract_dlc $cachepolicy)))
>; >;
} }

File diff suppressed because it is too large Load Diff