mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
AMDGPU/GlobalISel: Select llvm.amdgcn.s.buffer.load
Doesn't try to fail on the dlc bit pre-gfx10 like the DAG lowering does.
This commit is contained in:
parent
49633b5f44
commit
2a8cc659dc
@ -108,6 +108,14 @@ def gi_mubuf_offset_atomic :
|
||||
GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
|
||||
GIComplexPatternEquiv<MUBUFOffsetAtomic>;
|
||||
|
||||
def gi_smrd_buffer_imm :
|
||||
GIComplexOperandMatcher<s64, "selectSMRDBufferImm">,
|
||||
GIComplexPatternEquiv<SMRDBufferImm>;
|
||||
|
||||
def gi_smrd_buffer_imm32 :
|
||||
GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">,
|
||||
GIComplexPatternEquiv<SMRDBufferImm32>;
|
||||
|
||||
// Separate load nodes are defined to glue m0 initialization in
|
||||
// SelectionDAG. The GISel selector can just insert m0 initialization
|
||||
// directly before before selecting a glue-less load, so hide this
|
||||
@ -182,6 +190,7 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
|
||||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
|
||||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
|
||||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
|
||||
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
|
||||
|
||||
class GISelSop2Pat <
|
||||
SDPatternOperator node,
|
||||
|
@ -2859,6 +2859,45 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
|
||||
}};
|
||||
}
|
||||
|
||||
/// Get an immediate that must be 32-bits, and treated as zero extended.
|
||||
static Optional<uint64_t> getConstantZext32Val(Register Reg,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
// getConstantVRegVal sexts any values, so see if that matters.
|
||||
Optional<int64_t> OffsetVal = getConstantVRegVal(Reg, MRI);
|
||||
if (!OffsetVal || !isInt<32>(*OffsetVal))
|
||||
return None;
|
||||
return Lo_32(*OffsetVal);
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
|
||||
Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
|
||||
if (!OffsetVal)
|
||||
return {};
|
||||
|
||||
Optional<int64_t> EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal);
|
||||
if (!EncodedImm)
|
||||
return {};
|
||||
|
||||
return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
|
||||
assert(STI.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
|
||||
|
||||
Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
|
||||
if (!OffsetVal)
|
||||
return {};
|
||||
|
||||
Optional<int64_t> EncodedImm
|
||||
= AMDGPU::getSMRDEncodedLiteralOffset32(STI, *OffsetVal);
|
||||
if (!EncodedImm)
|
||||
return {};
|
||||
|
||||
return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
|
@ -222,6 +222,9 @@ private:
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectMUBUFAddr64Atomic(MachineOperand &Root) const;
|
||||
|
||||
ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
|
||||
ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
|
||||
|
||||
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx = -1) const;
|
||||
|
||||
|
@ -769,7 +769,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
||||
// 1. Offset as an immediate
|
||||
def : GCNPat <
|
||||
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (extract_glc $cachepolicy),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
|
||||
(extract_dlc $cachepolicy)))> {
|
||||
let AddedComplexity = 2;
|
||||
}
|
||||
@ -777,7 +777,8 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
||||
// 2. 32-bit IMM offset on CI
|
||||
def : GCNPat <
|
||||
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
|
||||
(!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
|
||||
(!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
|
||||
(extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
|
||||
let OtherPredicates = [isGFX7Only];
|
||||
let AddedComplexity = 1;
|
||||
}
|
||||
@ -785,7 +786,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
||||
// 3. Offset loaded in an 32bit SGPR
|
||||
def : GCNPat <
|
||||
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (extract_glc $cachepolicy),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
|
||||
(extract_dlc $cachepolicy)))
|
||||
>;
|
||||
}
|
||||
|
4281
test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
Normal file
4281
test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user