mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 10:32:48 +02:00
[MCA][InstrBuilder] Always check for implicit uses of resource units (PR50725).
When instructions are issued to the underlying pipeline resources, the mca::ResourceManager should also check for the presence of extra uses induced by the explicit consumption of multiple partially overlapping group resources. Fixes PR50725
This commit is contained in:
parent
12d0e1a8c8
commit
a8b232ce81
@ -359,6 +359,9 @@ struct InstrDesc {
|
||||
// A bitmask of used processor resource units.
|
||||
uint64_t UsedProcResUnits;
|
||||
|
||||
// A bitmask of implicit uses of processor resource units.
|
||||
uint64_t ImplicitlyUsedProcResUnits;
|
||||
|
||||
// A bitmask of used processor resource groups.
|
||||
uint64_t UsedProcResGroups;
|
||||
|
||||
|
@ -114,8 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
|
||||
Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
|
||||
ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
|
||||
ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
|
||||
ProcResUnitMask(0), ReservedResourceGroups(0),
|
||||
AvailableBuffers(~0ULL), ReservedBuffers(0) {
|
||||
ProcResUnitMask(0), ReservedResourceGroups(0), AvailableBuffers(~0ULL),
|
||||
ReservedBuffers(0) {
|
||||
computeProcResourceMasks(SM, ProcResID2Mask);
|
||||
|
||||
// initialize vector ResIndex2ProcResID.
|
||||
@ -288,6 +288,15 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
|
||||
BusyResourceMask |= E.first;
|
||||
}
|
||||
|
||||
uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
|
||||
while (ImplicitUses) {
|
||||
uint64_t Use = ImplicitUses & -ImplicitUses;
|
||||
ImplicitUses ^= Use;
|
||||
unsigned Index = getResourceStateIndex(Use);
|
||||
if (!Resources[Index]->isReady(/* NumUnits */ 1))
|
||||
BusyResourceMask |= Index;
|
||||
}
|
||||
|
||||
BusyResourceMask &= ProcResUnitMask;
|
||||
if (BusyResourceMask)
|
||||
return BusyResourceMask;
|
||||
|
@ -43,7 +43,7 @@ static void initializeUsedResources(InstrDesc &ID,
|
||||
|
||||
// Populate resources consumed.
|
||||
using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
|
||||
std::vector<ResourcePlusCycles> Worklist;
|
||||
SmallVector<ResourcePlusCycles, 4> Worklist;
|
||||
|
||||
// Track cycles contributed by resources that are in a "Super" relationship.
|
||||
// This is required if we want to correctly match the behavior of method
|
||||
@ -109,6 +109,11 @@ static void initializeUsedResources(InstrDesc &ID,
|
||||
|
||||
uint64_t UsedResourceUnits = 0;
|
||||
uint64_t UsedResourceGroups = 0;
|
||||
auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
|
||||
return countPopulation(Elt.first) > 1;
|
||||
});
|
||||
unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
|
||||
uint64_t ImpliedUsesOfResourceUnits = 0;
|
||||
|
||||
// Remove cycles contributed by smaller resources.
|
||||
for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
|
||||
@ -127,6 +132,15 @@ static void initializeUsedResources(InstrDesc &ID,
|
||||
// Remove the leading 1 from the resource group mask.
|
||||
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
|
||||
UsedResourceGroups |= (A.first ^ NormalizedMask);
|
||||
|
||||
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
|
||||
if ((NormalizedMask != AvailableMask) &&
|
||||
countPopulation(AvailableMask) == 1) {
|
||||
// At simulation time, this resource group use will decay into a simple
|
||||
// use of the resource unit identified by `AvailableMask`.
|
||||
ImpliedUsesOfResourceUnits |= AvailableMask;
|
||||
UsedResourceUnits |= AvailableMask;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned J = I + 1; J < E; ++J) {
|
||||
@ -139,6 +153,31 @@ static void initializeUsedResources(InstrDesc &ID,
|
||||
}
|
||||
}
|
||||
|
||||
// Look for implicit uses of processor resource units. These are resource
|
||||
// units which are indirectly consumed by resource groups, and that must be
|
||||
// always available on instruction issue.
|
||||
while (ImpliedUsesOfResourceUnits) {
|
||||
ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
|
||||
ImpliedUsesOfResourceUnits = 0;
|
||||
for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
|
||||
ResourcePlusCycles &A = Worklist[I];
|
||||
if (!A.second.size())
|
||||
continue;
|
||||
|
||||
uint64_t NormalizedMask = A.first;
|
||||
assert(countPopulation(NormalizedMask) > 1);
|
||||
// Remove the leading 1 from the resource group mask.
|
||||
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
|
||||
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
|
||||
if ((NormalizedMask != AvailableMask) &&
|
||||
countPopulation(AvailableMask) != 1)
|
||||
continue;
|
||||
|
||||
UsedResourceUnits |= AvailableMask;
|
||||
ImpliedUsesOfResourceUnits |= AvailableMask;
|
||||
}
|
||||
}
|
||||
|
||||
// A SchedWrite may specify a number of cycles in which a resource group
|
||||
// is reserved. For example (on target x86; cpu Haswell):
|
||||
//
|
||||
@ -198,6 +237,8 @@ static void initializeUsedResources(InstrDesc &ID,
|
||||
BufferIDs ^= Current;
|
||||
}
|
||||
dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
|
||||
dbgs() << "\t\tImplicitly Used Units="
|
||||
<< format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
|
||||
dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
|
||||
<< '\n';
|
||||
});
|
||||
|
19
test/tools/llvm-mca/X86/SkylakeClient/PR50725.s
Normal file
19
test/tools/llvm-mca/X86/SkylakeClient/PR50725.s
Normal file
@ -0,0 +1,19 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -all-views=false -summary-view -iterations=1 < %s | FileCheck %s
|
||||
|
||||
# Do not crash when running this simulation.
|
||||
# It is not safe to issue FXRSTOR if SKLPort1 is not available.
|
||||
|
||||
bswap %eax
|
||||
bswap %eax
|
||||
fxrstor 64(%rsp)
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 3
|
||||
# CHECK-NEXT: Total Cycles: 68
|
||||
# CHECK-NEXT: Total uOps: 92
|
||||
|
||||
# CHECK: Dispatch Width: 6
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.35
|
||||
# CHECK-NEXT: IPC: 0.04
|
||||
# CHECK-NEXT: Block RThroughput: 16.5
|
Loading…
Reference in New Issue
Block a user