mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU/GlobalISel: Select llvm.amdgcn.s.buffer.load
Doesn't try to fail on the dlc bit pre-gfx10 like the DAG lowering does.
This commit is contained in:
parent
49633b5f44
commit
2a8cc659dc
@ -108,6 +108,14 @@ def gi_mubuf_offset_atomic :
|
|||||||
GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
|
GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
|
||||||
GIComplexPatternEquiv<MUBUFOffsetAtomic>;
|
GIComplexPatternEquiv<MUBUFOffsetAtomic>;
|
||||||
|
|
||||||
|
def gi_smrd_buffer_imm :
|
||||||
|
GIComplexOperandMatcher<s64, "selectSMRDBufferImm">,
|
||||||
|
GIComplexPatternEquiv<SMRDBufferImm>;
|
||||||
|
|
||||||
|
def gi_smrd_buffer_imm32 :
|
||||||
|
GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">,
|
||||||
|
GIComplexPatternEquiv<SMRDBufferImm32>;
|
||||||
|
|
||||||
// Separate load nodes are defined to glue m0 initialization in
|
// Separate load nodes are defined to glue m0 initialization in
|
||||||
// SelectionDAG. The GISel selector can just insert m0 initialization
|
// SelectionDAG. The GISel selector can just insert m0 initialization
|
||||||
// directly before before selecting a glue-less load, so hide this
|
// directly before before selecting a glue-less load, so hide this
|
||||||
@ -182,6 +190,7 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
|
|||||||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
|
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
|
||||||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
|
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
|
||||||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
|
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
|
||||||
|
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
|
||||||
|
|
||||||
class GISelSop2Pat <
|
class GISelSop2Pat <
|
||||||
SDPatternOperator node,
|
SDPatternOperator node,
|
||||||
|
@ -2859,6 +2859,45 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
|
|||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get an immediate that must be 32-bits, and treated as zero extended.
|
||||||
|
static Optional<uint64_t> getConstantZext32Val(Register Reg,
|
||||||
|
const MachineRegisterInfo &MRI) {
|
||||||
|
// getConstantVRegVal sexts any values, so see if that matters.
|
||||||
|
Optional<int64_t> OffsetVal = getConstantVRegVal(Reg, MRI);
|
||||||
|
if (!OffsetVal || !isInt<32>(*OffsetVal))
|
||||||
|
return None;
|
||||||
|
return Lo_32(*OffsetVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
InstructionSelector::ComplexRendererFns
|
||||||
|
AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
|
||||||
|
Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
|
||||||
|
if (!OffsetVal)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
Optional<int64_t> EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal);
|
||||||
|
if (!EncodedImm)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
|
||||||
|
}
|
||||||
|
|
||||||
|
InstructionSelector::ComplexRendererFns
|
||||||
|
AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
|
||||||
|
assert(STI.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
|
||||||
|
|
||||||
|
Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
|
||||||
|
if (!OffsetVal)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
Optional<int64_t> EncodedImm
|
||||||
|
= AMDGPU::getSMRDEncodedLiteralOffset32(STI, *OffsetVal);
|
||||||
|
if (!EncodedImm)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
|
||||||
|
}
|
||||||
|
|
||||||
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
|
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
|
||||||
const MachineInstr &MI,
|
const MachineInstr &MI,
|
||||||
int OpIdx) const {
|
int OpIdx) const {
|
||||||
|
@ -222,6 +222,9 @@ private:
|
|||||||
InstructionSelector::ComplexRendererFns
|
InstructionSelector::ComplexRendererFns
|
||||||
selectMUBUFAddr64Atomic(MachineOperand &Root) const;
|
selectMUBUFAddr64Atomic(MachineOperand &Root) const;
|
||||||
|
|
||||||
|
ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
|
||||||
|
ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
|
||||||
|
|
||||||
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||||
int OpIdx = -1) const;
|
int OpIdx = -1) const;
|
||||||
|
|
||||||
|
@ -769,7 +769,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
|||||||
// 1. Offset as an immediate
|
// 1. Offset as an immediate
|
||||||
def : GCNPat <
|
def : GCNPat <
|
||||||
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
|
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
|
||||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (extract_glc $cachepolicy),
|
(vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
|
||||||
(extract_dlc $cachepolicy)))> {
|
(extract_dlc $cachepolicy)))> {
|
||||||
let AddedComplexity = 2;
|
let AddedComplexity = 2;
|
||||||
}
|
}
|
||||||
@ -777,7 +777,8 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
|||||||
// 2. 32-bit IMM offset on CI
|
// 2. 32-bit IMM offset on CI
|
||||||
def : GCNPat <
|
def : GCNPat <
|
||||||
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
|
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
|
||||||
(!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
|
(!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
|
||||||
|
(extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
|
||||||
let OtherPredicates = [isGFX7Only];
|
let OtherPredicates = [isGFX7Only];
|
||||||
let AddedComplexity = 1;
|
let AddedComplexity = 1;
|
||||||
}
|
}
|
||||||
@ -785,7 +786,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
|||||||
// 3. Offset loaded in an 32bit SGPR
|
// 3. Offset loaded in an 32bit SGPR
|
||||||
def : GCNPat <
|
def : GCNPat <
|
||||||
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
|
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
|
||||||
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (extract_glc $cachepolicy),
|
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
|
||||||
(extract_dlc $cachepolicy)))
|
(extract_dlc $cachepolicy)))
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
|
4281
test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
Normal file
4281
test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user