mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AMDGPU] Propagate amdgpu-waves-per-eu to callees
Differential Revision: https://reviews.llvm.org/D76868
This commit is contained in:
parent
c9d270bf8d
commit
f9ca869bd0
@ -48,19 +48,62 @@ extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
|
||||
|
||||
namespace {
|
||||
|
||||
// Target features to propagate.
|
||||
static constexpr const FeatureBitset TargetFeatures = {
|
||||
AMDGPU::FeatureWavefrontSize16,
|
||||
AMDGPU::FeatureWavefrontSize32,
|
||||
AMDGPU::FeatureWavefrontSize64
|
||||
};
|
||||
|
||||
// Attributes to propagate.
|
||||
static constexpr const char* AttributeNames[] = {
|
||||
"amdgpu-waves-per-eu"
|
||||
};
|
||||
|
||||
static constexpr unsigned NumAttr =
|
||||
sizeof(AttributeNames) / sizeof(AttributeNames[0]);
|
||||
|
||||
class AMDGPUPropagateAttributes {
|
||||
const FeatureBitset TargetFeatures = {
|
||||
AMDGPU::FeatureWavefrontSize16,
|
||||
AMDGPU::FeatureWavefrontSize32,
|
||||
AMDGPU::FeatureWavefrontSize64
|
||||
|
||||
class FnProperties {
|
||||
private:
|
||||
explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
|
||||
|
||||
public:
|
||||
explicit FnProperties(const TargetMachine &TM, const Function &F) {
|
||||
Features = TM.getSubtargetImpl(F)->getFeatureBits();
|
||||
|
||||
for (unsigned I = 0; I < NumAttr; ++I)
|
||||
if (F.hasFnAttribute(AttributeNames[I]))
|
||||
Attributes[I] = F.getFnAttribute(AttributeNames[I]);
|
||||
}
|
||||
|
||||
bool operator == (const FnProperties &Other) const {
|
||||
if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
|
||||
return false;
|
||||
for (unsigned I = 0; I < NumAttr; ++I)
|
||||
if (Attributes[I] != Other.Attributes[I])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
FnProperties adjustToCaller(const FnProperties &CallerProps) const {
|
||||
FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
|
||||
for (unsigned I = 0; I < NumAttr; ++I)
|
||||
New.Attributes[I] = CallerProps.Attributes[I];
|
||||
return New;
|
||||
}
|
||||
|
||||
FeatureBitset Features;
|
||||
Optional<Attribute> Attributes[NumAttr];
|
||||
};
|
||||
|
||||
class Clone{
|
||||
class Clone {
|
||||
public:
|
||||
Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
|
||||
FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
|
||||
Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
|
||||
Properties(Props), OrigF(OrigF), NewF(NewF) {}
|
||||
|
||||
FeatureBitset FeatureMask;
|
||||
FnProperties Properties;
|
||||
Function *OrigF;
|
||||
Function *NewF;
|
||||
};
|
||||
@ -77,17 +120,19 @@ class AMDGPUPropagateAttributes {
|
||||
SmallVector<Clone, 32> Clones;
|
||||
|
||||
// Find a clone with required features.
|
||||
Function *findFunction(const FeatureBitset &FeaturesNeeded,
|
||||
Function *findFunction(const FnProperties &PropsNeeded,
|
||||
Function *OrigF);
|
||||
|
||||
// Clone function F and set NewFeatures on the clone.
|
||||
// Clone function \p F and set \p NewProps on the clone.
|
||||
// Cole takes the name of original function.
|
||||
Function *cloneWithFeatures(Function &F,
|
||||
const FeatureBitset &NewFeatures);
|
||||
Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
|
||||
|
||||
// Set new function's features in place.
|
||||
void setFeatures(Function &F, const FeatureBitset &NewFeatures);
|
||||
|
||||
// Set new function's attributes in place.
|
||||
void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
|
||||
|
||||
std::string getFeatureString(const FeatureBitset &Features) const;
|
||||
|
||||
// Propagate attributes from Roots.
|
||||
@ -155,11 +200,11 @@ INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
|
||||
false, false)
|
||||
|
||||
Function *
|
||||
AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
|
||||
AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
|
||||
Function *OrigF) {
|
||||
// TODO: search for clone's clones.
|
||||
for (Clone &C : Clones)
|
||||
if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
|
||||
if (C.OrigF == OrigF && PropsNeeded == C.Properties)
|
||||
return C.NewF;
|
||||
|
||||
return nullptr;
|
||||
@ -195,8 +240,7 @@ bool AMDGPUPropagateAttributes::process() {
|
||||
if (F.isDeclaration())
|
||||
continue;
|
||||
|
||||
const FeatureBitset &CalleeBits =
|
||||
TM->getSubtargetImpl(F)->getFeatureBits();
|
||||
const FnProperties CalleeProps(*TM, F);
|
||||
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
|
||||
SmallSet<CallBase *, 32> Visited;
|
||||
|
||||
@ -213,32 +257,31 @@ bool AMDGPUPropagateAttributes::process() {
|
||||
if (!Roots.count(Caller) && !NewRoots.count(Caller))
|
||||
continue;
|
||||
|
||||
const FeatureBitset &CallerBits =
|
||||
TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
|
||||
const FnProperties CallerProps(*TM, *Caller);
|
||||
|
||||
if (CallerBits == (CalleeBits & TargetFeatures)) {
|
||||
if (CalleeProps == CallerProps) {
|
||||
if (!Roots.count(&F))
|
||||
NewRoots.insert(&F);
|
||||
continue;
|
||||
}
|
||||
|
||||
Function *NewF = findFunction(CallerBits, &F);
|
||||
Function *NewF = findFunction(CallerProps, &F);
|
||||
if (!NewF) {
|
||||
FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
|
||||
CallerBits);
|
||||
const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
|
||||
if (!AllowClone) {
|
||||
// This may set different features on different iteartions if
|
||||
// there is a contradiction in callers' attributes. In this case
|
||||
// we rely on a second pass running on Module, which is allowed
|
||||
// to clone.
|
||||
setFeatures(F, NewFeatures);
|
||||
setFeatures(F, NewProps.Features);
|
||||
setAttributes(F, NewProps.Attributes);
|
||||
NewRoots.insert(&F);
|
||||
Changed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
NewF = cloneWithFeatures(F, NewFeatures);
|
||||
Clones.push_back(Clone(CallerBits, &F, NewF));
|
||||
NewF = cloneWithProperties(F, NewProps);
|
||||
Clones.push_back(Clone(CallerProps, &F, NewF));
|
||||
NewRoots.insert(NewF);
|
||||
}
|
||||
|
||||
@ -267,13 +310,14 @@ bool AMDGPUPropagateAttributes::process() {
|
||||
}
|
||||
|
||||
Function *
|
||||
AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
|
||||
const FeatureBitset &NewFeatures) {
|
||||
AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
|
||||
const FnProperties &NewProps) {
|
||||
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
|
||||
|
||||
ValueToValueMapTy dummy;
|
||||
Function *NewF = CloneFunction(&F, dummy);
|
||||
setFeatures(*NewF, NewFeatures);
|
||||
setFeatures(*NewF, NewProps.Features);
|
||||
setAttributes(*NewF, NewProps.Attributes);
|
||||
NewF->setVisibility(GlobalValue::DefaultVisibility);
|
||||
NewF->setLinkage(GlobalValue::InternalLinkage);
|
||||
|
||||
@ -300,6 +344,18 @@ void AMDGPUPropagateAttributes::setFeatures(Function &F,
|
||||
F.addFnAttr("target-features", NewFeatureStr);
|
||||
}
|
||||
|
||||
void AMDGPUPropagateAttributes::setAttributes(Function &F,
|
||||
const ArrayRef<Optional<Attribute>> NewAttrs) {
|
||||
LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
|
||||
for (unsigned I = 0; I < NumAttr; ++I) {
|
||||
F.removeFnAttr(AttributeNames[I]);
|
||||
if (NewAttrs[I]) {
|
||||
LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
|
||||
F.addFnAttr(*NewAttrs[I]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
|
||||
{
|
||||
|
@ -55,11 +55,11 @@
|
||||
; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
|
||||
; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
|
||||
; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
|
||||
; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
|
||||
; OPT: attributes #0 = { {{.*}} "amdgpu-waves-per-eu"="1,1" "target-features"="+wavefrontsize64" }
|
||||
; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
|
||||
; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }
|
||||
; OPT: attributes #2 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="+wavefrontsize32" }
|
||||
; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" }
|
||||
; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
|
||||
; OPT: attributes #4 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
|
||||
|
||||
; LLC: foo3:
|
||||
; LLC: sample asm
|
||||
@ -94,7 +94,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo3() #1 {
|
||||
define void @foo3() #4 {
|
||||
entry:
|
||||
call void asm sideeffect "; sample asm", ""()
|
||||
ret void
|
||||
@ -135,7 +135,8 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
|
||||
attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
|
||||
attributes #0 = { nounwind "target-features"="+wavefrontsize32" "amdgpu-waves-per-eu"="2,4" }
|
||||
attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="1,1" }
|
||||
attributes #2 = { nounwind "target-features"="+wavefrontsize64" }
|
||||
attributes #3 = { nounwind "target-features"="+wavefrontsize64" }
|
||||
attributes #4 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="2,4" }
|
||||
|
Loading…
Reference in New Issue
Block a user