mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
AMDGPU: Set element_size in private resource descriptor
Introduce a subtarget feature for this, and leave the default with the current behavior which assumes up to 16-byte loads/stores can be used. The field also seems to have the ability to be set to 2 bytes, but I'm not sure what that would be used for. llvm-svn: 260651
This commit is contained in:
parent
cf66bc968c
commit
628b2818b6
@ -175,6 +175,18 @@ def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
|
||||
"Enable floating point exceptions"
|
||||
>;
|
||||
|
||||
class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
|
||||
"max-private-element-size-"#size,
|
||||
"MaxPrivateElementSize",
|
||||
!cast<string>(size),
|
||||
"Maximum private access size may be "#size
|
||||
>;
|
||||
|
||||
def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
|
||||
def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
|
||||
def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
|
||||
|
||||
|
||||
def FeatureEnableHugeScratchBuffer : SubtargetFeature<
|
||||
"huge-scratch-buffer",
|
||||
"EnableHugeScratchBuffer",
|
||||
|
@ -593,6 +593,20 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
||||
}
|
||||
}
|
||||
|
||||
// This is supposed to be log2(Size)
|
||||
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
|
||||
switch (Size) {
|
||||
case 4:
|
||||
return AMD_ELEMENT_4_BYTES;
|
||||
case 8:
|
||||
return AMD_ELEMENT_8_BYTES;
|
||||
case 16:
|
||||
return AMD_ELEMENT_16_BYTES;
|
||||
default:
|
||||
llvm_unreachable("invalid private_element_size");
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo) const {
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
@ -606,6 +620,11 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
(KernelInfo.ComputePGMRSrc2 << 32);
|
||||
header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
|
||||
|
||||
|
||||
AMD_HSA_BITS_SET(header.code_properties,
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
|
||||
getElementByteSizeValue(STM.getMaxPrivateElementSize()));
|
||||
|
||||
if (MFI->hasPrivateSegmentBuffer()) {
|
||||
header.code_properties |=
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
|
||||
|
@ -58,6 +58,11 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
FP32Denormals = false;
|
||||
FP64Denormals = false;
|
||||
}
|
||||
|
||||
// Set defaults if needed.
|
||||
if (MaxPrivateElementSize == 0)
|
||||
MaxPrivateElementSize = 16;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -74,7 +79,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
EnableUnsafeDSOffsetFolding(false),
|
||||
EnableXNACK(false),
|
||||
WavefrontSize(0), CFALUBug(false),
|
||||
LocalMemorySize(0),
|
||||
LocalMemorySize(0), MaxPrivateElementSize(0),
|
||||
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
|
||||
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
|
||||
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
|
||||
|
@ -81,6 +81,7 @@ private:
|
||||
unsigned WavefrontSize;
|
||||
bool CFALUBug;
|
||||
int LocalMemorySize;
|
||||
unsigned MaxPrivateElementSize;
|
||||
bool EnableVGPRSpilling;
|
||||
bool SGPRInitBug;
|
||||
bool IsGCN;
|
||||
@ -253,6 +254,10 @@ public:
|
||||
return LocalMemorySize;
|
||||
}
|
||||
|
||||
unsigned getMaxPrivateElementSize() const {
|
||||
return MaxPrivateElementSize;
|
||||
}
|
||||
|
||||
bool hasSGPRInitBug() const {
|
||||
return SGPRInitBug;
|
||||
}
|
||||
|
@ -44,6 +44,15 @@ enum amd_code_version_t {
|
||||
AMD_CODE_VERSION_MINOR = 1
|
||||
};
|
||||
|
||||
// Sets val bits for specified mask in specified dst packed instance.
|
||||
#define AMD_HSA_BITS_SET(dst, mask, val) \
|
||||
dst &= (~(1 << mask ## _SHIFT) & ~mask); \
|
||||
dst |= (((val) << mask ## _SHIFT) & mask)
|
||||
|
||||
// Gets bits for specified mask from specified src packed instance.
|
||||
#define AMD_HSA_BITS_GET(src, mask) \
|
||||
((src & mask) >> mask ## _SHIFT) \
|
||||
|
||||
/// The values used to define the number of bytes to use for the
|
||||
/// swizzle element size.
|
||||
enum amd_element_byte_size_t {
|
||||
|
@ -3059,6 +3059,10 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
|
||||
AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size;
|
||||
|
||||
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
|
||||
|
||||
Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
|
||||
|
||||
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
|
||||
// Clear them unless we want a huge stride.
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
|
@ -489,7 +489,7 @@ namespace AMDGPU {
|
||||
|
||||
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
|
||||
const uint64_t RSRC_TID_ENABLE = 1LL << 55;
|
||||
|
||||
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = 51;
|
||||
} // End namespace AMDGPU
|
||||
|
||||
namespace SI {
|
||||
|
@ -10,8 +10,8 @@
|
||||
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GCN: s_mov_b32 s10, -1
|
||||
; CI: s_mov_b32 s11, 0x80f000
|
||||
; VI: s_mov_b32 s11, 0x800000
|
||||
; CI: s_mov_b32 s11, 0x98f000
|
||||
; VI: s_mov_b32 s11, 0x980000
|
||||
|
||||
|
||||
; GCNHSA: .amd_kernel_code_t
|
||||
|
@ -5,8 +5,8 @@
|
||||
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GCN: s_mov_b32 s10, -1
|
||||
; CI: s_mov_b32 s11, 0x80f000
|
||||
; VI: s_mov_b32 s11, 0x800000
|
||||
; CI: s_mov_b32 s11, 0x98f000
|
||||
; VI: s_mov_b32 s11, 0x980000
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
|
||||
@ -26,8 +26,8 @@ define void @large_alloca_pixel_shader(i32 %x, i32 %y) #1 {
|
||||
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; GCN: s_mov_b32 s10, -1
|
||||
; CI: s_mov_b32 s11, 0x80f000
|
||||
; VI: s_mov_b32 s11, 0x800000
|
||||
; CI: s_mov_b32 s11, 0x98f000
|
||||
; VI: s_mov_b32 s11, 0x980000
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
|
||||
|
@ -17,8 +17,8 @@
|
||||
; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GCN-NEXT: s_mov_b32 s14, -1
|
||||
; SI-NEXT: s_mov_b32 s15, 0x80f000
|
||||
; VI-NEXT: s_mov_b32 s15, 0x800000
|
||||
; SI-NEXT: s_mov_b32 s15, 0x98f000
|
||||
; VI-NEXT: s_mov_b32 s15, 0x980000
|
||||
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
|
||||
|
@ -14,8 +14,8 @@
|
||||
; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GCN-NEXT: s_mov_b32 s14, -1
|
||||
; SI-NEXT: s_mov_b32 s15, 0x80f000
|
||||
; VI-NEXT: s_mov_b32 s15, 0x800000
|
||||
; SI-NEXT: s_mov_b32 s15, 0x98f000
|
||||
; VI-NEXT: s_mov_b32 s15, 0x980000
|
||||
|
||||
; s12 is offset user SGPR
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill
|
||||
|
Loading…
Reference in New Issue
Block a user