1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[MCA][InstrBuilder] Always check for implicit uses of resource units (PR50725).

When instructions are issued to the underlying pipeline resources, the
mca::ResourceManager should also check for the presence of extra uses induced by
the explicit consumption of multiple partially overlapping group resources.

Fixes PR50725
This commit is contained in:
Andrea Di Biagio 2021-06-16 14:39:14 +01:00
parent 12d0e1a8c8
commit a8b232ce81
4 changed files with 75 additions and 3 deletions

View File

@ -359,6 +359,9 @@ struct InstrDesc {
// A bitmask of used processor resource units.
uint64_t UsedProcResUnits;
// A bitmask of implicit uses of processor resource units.
uint64_t ImplicitlyUsedProcResUnits;
// A bitmask of used processor resource groups.
uint64_t UsedProcResGroups;

View File

@ -114,8 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
ProcResUnitMask(0), ReservedResourceGroups(0),
AvailableBuffers(~0ULL), ReservedBuffers(0) {
ProcResUnitMask(0), ReservedResourceGroups(0), AvailableBuffers(~0ULL),
ReservedBuffers(0) {
computeProcResourceMasks(SM, ProcResID2Mask);
// initialize vector ResIndex2ProcResID.
@ -288,6 +288,15 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
BusyResourceMask |= E.first;
}
uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
while (ImplicitUses) {
uint64_t Use = ImplicitUses & -ImplicitUses;
ImplicitUses ^= Use;
unsigned Index = getResourceStateIndex(Use);
if (!Resources[Index]->isReady(/* NumUnits */ 1))
BusyResourceMask |= Index;
}
BusyResourceMask &= ProcResUnitMask;
if (BusyResourceMask)
return BusyResourceMask;

View File

@ -43,7 +43,7 @@ static void initializeUsedResources(InstrDesc &ID,
// Populate resources consumed.
using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
std::vector<ResourcePlusCycles> Worklist;
SmallVector<ResourcePlusCycles, 4> Worklist;
// Track cycles contributed by resources that are in a "Super" relationship.
// This is required if we want to correctly match the behavior of method
@ -109,6 +109,11 @@ static void initializeUsedResources(InstrDesc &ID,
uint64_t UsedResourceUnits = 0;
uint64_t UsedResourceGroups = 0;
auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
return countPopulation(Elt.first) > 1;
});
unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
uint64_t ImpliedUsesOfResourceUnits = 0;
// Remove cycles contributed by smaller resources.
for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
@ -127,6 +132,15 @@ static void initializeUsedResources(InstrDesc &ID,
// Remove the leading 1 from the resource group mask.
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
UsedResourceGroups |= (A.first ^ NormalizedMask);
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
if ((NormalizedMask != AvailableMask) &&
countPopulation(AvailableMask) == 1) {
// At simulation time, this resource group use will decay into a simple
// use of the resource unit identified by `AvailableMask`.
ImpliedUsesOfResourceUnits |= AvailableMask;
UsedResourceUnits |= AvailableMask;
}
}
for (unsigned J = I + 1; J < E; ++J) {
@ -139,6 +153,31 @@ static void initializeUsedResources(InstrDesc &ID,
}
}
// Look for implicit uses of processor resource units. These are resource
// units which are indirectly consumed by resource groups, and that must be
// always available on instruction issue.
while (ImpliedUsesOfResourceUnits) {
ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
ImpliedUsesOfResourceUnits = 0;
for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
ResourcePlusCycles &A = Worklist[I];
if (!A.second.size())
continue;
uint64_t NormalizedMask = A.first;
assert(countPopulation(NormalizedMask) > 1);
// Remove the leading 1 from the resource group mask.
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
if ((NormalizedMask != AvailableMask) &&
countPopulation(AvailableMask) != 1)
continue;
UsedResourceUnits |= AvailableMask;
ImpliedUsesOfResourceUnits |= AvailableMask;
}
}
// A SchedWrite may specify a number of cycles in which a resource group
// is reserved. For example (on target x86; cpu Haswell):
//
@ -198,6 +237,8 @@ static void initializeUsedResources(InstrDesc &ID,
BufferIDs ^= Current;
}
dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
dbgs() << "\t\tImplicitly Used Units="
<< format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
<< '\n';
});

View File

@ -0,0 +1,19 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -all-views=false -summary-view -iterations=1 < %s | FileCheck %s
# Do not crash when running this simulation.
# It is not safe to issue FXRSTOR if SKLPort1 is not available.
bswap %eax
bswap %eax
fxrstor 64(%rsp)
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 68
# CHECK-NEXT: Total uOps: 92
# CHECK: Dispatch Width: 6
# CHECK-NEXT: uOps Per Cycle: 1.35
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 16.5