1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[AMDGPU] Propagate amdgpu-waves-per-eu to callees

Differential Revision: https://reviews.llvm.org/D76868
This commit is contained in:
Stanislav Mekhanoshin 2020-03-25 12:57:44 -07:00
parent c9d270bf8d
commit f9ca869bd0
2 changed files with 91 additions and 34 deletions

View File

@ -48,19 +48,62 @@ extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
namespace {
// Target features to propagate.
static constexpr const FeatureBitset TargetFeatures = {
AMDGPU::FeatureWavefrontSize16,
AMDGPU::FeatureWavefrontSize32,
AMDGPU::FeatureWavefrontSize64
};
// Attributes to propagate.
static constexpr const char* AttributeNames[] = {
"amdgpu-waves-per-eu"
};
static constexpr unsigned NumAttr =
sizeof(AttributeNames) / sizeof(AttributeNames[0]);
class AMDGPUPropagateAttributes {
const FeatureBitset TargetFeatures = {
AMDGPU::FeatureWavefrontSize16,
AMDGPU::FeatureWavefrontSize32,
AMDGPU::FeatureWavefrontSize64
class FnProperties {
private:
explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
public:
explicit FnProperties(const TargetMachine &TM, const Function &F) {
Features = TM.getSubtargetImpl(F)->getFeatureBits();
for (unsigned I = 0; I < NumAttr; ++I)
if (F.hasFnAttribute(AttributeNames[I]))
Attributes[I] = F.getFnAttribute(AttributeNames[I]);
}
bool operator == (const FnProperties &Other) const {
if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
return false;
for (unsigned I = 0; I < NumAttr; ++I)
if (Attributes[I] != Other.Attributes[I])
return false;
return true;
}
FnProperties adjustToCaller(const FnProperties &CallerProps) const {
FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
for (unsigned I = 0; I < NumAttr; ++I)
New.Attributes[I] = CallerProps.Attributes[I];
return New;
}
FeatureBitset Features;
Optional<Attribute> Attributes[NumAttr];
};
class Clone{
class Clone {
public:
Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
Properties(Props), OrigF(OrigF), NewF(NewF) {}
FeatureBitset FeatureMask;
FnProperties Properties;
Function *OrigF;
Function *NewF;
};
@ -77,17 +120,19 @@ class AMDGPUPropagateAttributes {
SmallVector<Clone, 32> Clones;
// Find a clone with required features.
Function *findFunction(const FeatureBitset &FeaturesNeeded,
Function *findFunction(const FnProperties &PropsNeeded,
Function *OrigF);
// Clone function F and set NewFeatures on the clone.
// Clone function \p F and set \p NewProps on the clone.
// Cole takes the name of original function.
Function *cloneWithFeatures(Function &F,
const FeatureBitset &NewFeatures);
Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
// Set new function's features in place.
void setFeatures(Function &F, const FeatureBitset &NewFeatures);
// Set new function's attributes in place.
void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
std::string getFeatureString(const FeatureBitset &Features) const;
// Propagate attributes from Roots.
@ -155,11 +200,11 @@ INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
false, false)
Function *
AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
Function *OrigF) {
// TODO: search for clone's clones.
for (Clone &C : Clones)
if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
if (C.OrigF == OrigF && PropsNeeded == C.Properties)
return C.NewF;
return nullptr;
@ -195,8 +240,7 @@ bool AMDGPUPropagateAttributes::process() {
if (F.isDeclaration())
continue;
const FeatureBitset &CalleeBits =
TM->getSubtargetImpl(F)->getFeatureBits();
const FnProperties CalleeProps(*TM, F);
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
SmallSet<CallBase *, 32> Visited;
@ -213,32 +257,31 @@ bool AMDGPUPropagateAttributes::process() {
if (!Roots.count(Caller) && !NewRoots.count(Caller))
continue;
const FeatureBitset &CallerBits =
TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
const FnProperties CallerProps(*TM, *Caller);
if (CallerBits == (CalleeBits & TargetFeatures)) {
if (CalleeProps == CallerProps) {
if (!Roots.count(&F))
NewRoots.insert(&F);
continue;
}
Function *NewF = findFunction(CallerBits, &F);
Function *NewF = findFunction(CallerProps, &F);
if (!NewF) {
FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
CallerBits);
const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
if (!AllowClone) {
// This may set different features on different iteartions if
// there is a contradiction in callers' attributes. In this case
// we rely on a second pass running on Module, which is allowed
// to clone.
setFeatures(F, NewFeatures);
setFeatures(F, NewProps.Features);
setAttributes(F, NewProps.Attributes);
NewRoots.insert(&F);
Changed = true;
break;
}
NewF = cloneWithFeatures(F, NewFeatures);
Clones.push_back(Clone(CallerBits, &F, NewF));
NewF = cloneWithProperties(F, NewProps);
Clones.push_back(Clone(CallerProps, &F, NewF));
NewRoots.insert(NewF);
}
@ -267,13 +310,14 @@ bool AMDGPUPropagateAttributes::process() {
}
Function *
AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
const FeatureBitset &NewFeatures) {
AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
const FnProperties &NewProps) {
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
ValueToValueMapTy dummy;
Function *NewF = CloneFunction(&F, dummy);
setFeatures(*NewF, NewFeatures);
setFeatures(*NewF, NewProps.Features);
setAttributes(*NewF, NewProps.Attributes);
NewF->setVisibility(GlobalValue::DefaultVisibility);
NewF->setLinkage(GlobalValue::InternalLinkage);
@ -300,6 +344,18 @@ void AMDGPUPropagateAttributes::setFeatures(Function &F,
F.addFnAttr("target-features", NewFeatureStr);
}
void AMDGPUPropagateAttributes::setAttributes(Function &F,
const ArrayRef<Optional<Attribute>> NewAttrs) {
LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
for (unsigned I = 0; I < NumAttr; ++I) {
F.removeFnAttr(AttributeNames[I]);
if (NewAttrs[I]) {
LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
F.addFnAttr(*NewAttrs[I]);
}
}
}
std::string
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
{

View File

@ -55,11 +55,11 @@
; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
; OPT: attributes #0 = { {{.*}} "amdgpu-waves-per-eu"="1,1" "target-features"="+wavefrontsize64" }
; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }
; OPT: attributes #2 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="+wavefrontsize32" }
; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" }
; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
; OPT: attributes #4 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
; LLC: foo3:
; LLC: sample asm
@ -94,7 +94,7 @@ entry:
ret void
}
define void @foo3() #1 {
define void @foo3() #4 {
entry:
call void asm sideeffect "; sample asm", ""()
ret void
@ -135,7 +135,8 @@ entry:
ret void
}
attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
attributes #0 = { nounwind "target-features"="+wavefrontsize32" "amdgpu-waves-per-eu"="2,4" }
attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="1,1" }
attributes #2 = { nounwind "target-features"="+wavefrontsize64" }
attributes #3 = { nounwind "target-features"="+wavefrontsize64" }
attributes #4 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="2,4" }