mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
AMDGPU: Change internal tracking of wave size
Store the log2 wave size instead of forcing division and log2 operations when querying either.
This commit is contained in:
parent
76c68e0c0b
commit
3cd292c66e
@ -29,16 +29,16 @@ def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
|
||||
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
|
||||
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
|
||||
|
||||
class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
|
||||
"wavefrontsize"#Value,
|
||||
"WavefrontSize",
|
||||
!cast<string>(Value),
|
||||
class SubtargetFeatureWavefrontSize <int ValueLog2> : SubtargetFeature<
|
||||
"wavefrontsize"#!shl(1, ValueLog2),
|
||||
"WavefrontSizeLog2",
|
||||
!cast<string>(ValueLog2),
|
||||
"The number of threads per wavefront"
|
||||
>;
|
||||
|
||||
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
|
||||
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
|
||||
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
|
||||
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<4>;
|
||||
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<5>;
|
||||
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<6>;
|
||||
|
||||
class SubtargetFeatureGeneration <string Value, string FeatureName,
|
||||
string Subtarget,
|
||||
|
@ -126,8 +126,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
}
|
||||
|
||||
// Don't crash on invalid devices.
|
||||
if (WavefrontSize == 0)
|
||||
WavefrontSize = 64;
|
||||
if (WavefrontSizeLog2 == 0)
|
||||
WavefrontSizeLog2 = 5;
|
||||
|
||||
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
|
||||
|
||||
@ -163,7 +163,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
|
||||
HasTrigReducedRange(false),
|
||||
MaxWavesPerEU(10),
|
||||
LocalMemorySize(0),
|
||||
WavefrontSize(0)
|
||||
WavefrontSizeLog2(0)
|
||||
{ }
|
||||
|
||||
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
|
@ -77,7 +77,7 @@ protected:
|
||||
bool HasTrigReducedRange;
|
||||
unsigned MaxWavesPerEU;
|
||||
int LocalMemorySize;
|
||||
unsigned WavefrontSize;
|
||||
char WavefrontSizeLog2;
|
||||
|
||||
public:
|
||||
AMDGPUSubtarget(const Triple &TT);
|
||||
@ -181,7 +181,11 @@ public:
|
||||
}
|
||||
|
||||
unsigned getWavefrontSize() const {
|
||||
return WavefrontSize;
|
||||
return 1 << WavefrontSizeLog2;
|
||||
}
|
||||
|
||||
unsigned getWavefrontSizeLog2() const {
|
||||
return WavefrontSizeLog2;
|
||||
}
|
||||
|
||||
int getLocalMemorySize() const {
|
||||
@ -237,7 +241,7 @@ public:
|
||||
/// \returns Corresponsing DWARF register number mapping flavour for the
|
||||
/// \p WavefrontSize.
|
||||
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const {
|
||||
return WavefrontSize == 32 ? AMDGPUDwarfFlavour::Wave32
|
||||
return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32
|
||||
: AMDGPUDwarfFlavour::Wave64;
|
||||
}
|
||||
|
||||
@ -454,10 +458,6 @@ public:
|
||||
return (Generation)Gen;
|
||||
}
|
||||
|
||||
unsigned getWavefrontSizeLog2() const {
|
||||
return Log2_32(WavefrontSize);
|
||||
}
|
||||
|
||||
/// Return the number of high bits known to be zero fror a frame index.
|
||||
unsigned getKnownHighZeroBitsForFrameIndex() const {
|
||||
return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
|
||||
@ -1160,7 +1160,7 @@ public:
|
||||
const override;
|
||||
|
||||
bool isWave32() const {
|
||||
return WavefrontSize == 32;
|
||||
return getWavefrontSize() == 32;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *getBoolRC() const {
|
||||
|
Loading…
x
Reference in New Issue
Block a user