mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[llvm-mca] Add support for instructions with a variadic number of operands.
By default, llvm-mca conservatively assumes that a register operand from the variadic sequence is both a register read and a register write. That is because MCInstrDesc doesn't describe extra variadic operands; we don't have enough dataflow information to tell which register operands from the variadic sequence is a definition, and which is a use instead. However, if a variadic instruction is flagged 'mayStore' (but not 'mayLoad'), and it has no 'unmodeledSideEffects', then llvm-mca (very) optimistically assumes that any register operand in the variadic sequence is a register read only. Conversely, if a variadic instruction is marked as 'mayLoad' (but not 'mayStore'), and it has no 'unmodeledSideEffects', then llvm-mca optimistically assumes that any extra register operand is a register definition only. These assumptions work quite well for variadic load/store multiple instructions defined by the ARM backend. llvm-svn: 347522
This commit is contained in:
parent
04925aa9f6
commit
5864f5b887
65
test/tools/llvm-mca/ARM/memcpy-ldm-stm.s
Normal file
65
test/tools/llvm-mca/ARM/memcpy-ldm-stm.s
Normal file
@ -0,0 +1,65 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=armv7-unknown-unknown -mcpu=swift -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
|
||||
|
||||
ldm r2!, {r3, r4, r5, r6, r12, lr}
|
||||
stm r0!, {r3, r4, r5, r6, r12, lr}
|
||||
|
||||
# CHECK: Iterations: 300
|
||||
# CHECK-NEXT: Instructions: 600
|
||||
# CHECK-NEXT: Total Cycles: 1295
|
||||
# CHECK-NEXT: Total uOps: 2400
|
||||
|
||||
# CHECK: Dispatch Width: 3
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.85
|
||||
# CHECK-NEXT: IPC: 0.46
|
||||
# CHECK-NEXT: Block RThroughput: 4.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 3 18 2.00 * ldm r2!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: 5 1 2.00 * stm r0!, {r3, r4, r5, r6, r12, lr}
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - SwiftUnitDiv
|
||||
# CHECK-NEXT: [1] - SwiftUnitP0
|
||||
# CHECK-NEXT: [2] - SwiftUnitP1
|
||||
# CHECK-NEXT: [3] - SwiftUnitP2
|
||||
# CHECK-NEXT: [4.0] - SwiftUnitP01
|
||||
# CHECK-NEXT: [4.1] - SwiftUnitP01
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1]
|
||||
# CHECK-NEXT: - - - 4.00 2.46 2.54
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] Instructions:
|
||||
# CHECK-NEXT: - - - 2.00 1.09 0.91 ldm r2!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: - - - 2.00 1.37 1.63 stm r0!, {r3, r4, r5, r6, r12, lr}
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 012345678
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeER . . ldm r2!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: [0,1] .D=================eER . . stm r0!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: [1,0] . DeeeeeeeeeeeeeeeeeeER . . ldm r2!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: [1,1] . D=================eER. . stm r0!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: [2,0] . .DeeeeeeeeeeeeeeeeeeER . ldm r2!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: [2,1] . . D==================eER stm r0!, {r3, r4, r5, r6, r12, lr}
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 1.0 1.0 0.0 ldm r2!, {r3, r4, r5, r6, r12, lr}
|
||||
# CHECK-NEXT: 1. 3 18.3 0.3 0.0 stm r0!, {r3, r4, r5, r6, r12, lr}
|
@ -190,14 +190,6 @@ static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
|
||||
}
|
||||
|
||||
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
|
||||
// Variadic opcodes are not correctly supported.
|
||||
if (MCDesc.isVariadic()) {
|
||||
if (MCI.getNumOperands() - MCDesc.getNumOperands()) {
|
||||
return make_error<InstructionError<MCInst>>(
|
||||
"Don't know how to process this variadic opcode.", MCI);
|
||||
}
|
||||
}
|
||||
|
||||
// Count register definitions, and skip non register operands in the process.
|
||||
unsigned I, E;
|
||||
unsigned NumExplicitDefs = MCDesc.getNumDefs();
|
||||
@ -281,7 +273,8 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
|
||||
if (MCDesc.hasOptionalDef())
|
||||
TotalDefs++;
|
||||
|
||||
ID.Writes.resize(TotalDefs);
|
||||
unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
|
||||
ID.Writes.resize(TotalDefs + NumVariadicOps);
|
||||
// Iterate over the operands list, and skip non-register operands.
|
||||
// The first NumExplictDefs register operands are expected to be register
|
||||
// definitions.
|
||||
@ -358,6 +351,41 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
|
||||
<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
|
||||
});
|
||||
}
|
||||
|
||||
if (!NumVariadicOps)
|
||||
return;
|
||||
|
||||
// FIXME: if an instruction opcode is flagged 'mayStore', and it has no
|
||||
// "unmodeledSideEffects', then this logic optimistically assumes that any
|
||||
// extra register operands in the variadic sequence is not a register
|
||||
// definition.
|
||||
//
|
||||
// Otherwise, we conservatively assume that any register operand from the
|
||||
// variadic sequence is both a register read and a register write.
|
||||
bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
|
||||
!MCDesc.hasUnmodeledSideEffects();
|
||||
CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
|
||||
for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
|
||||
I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
|
||||
const MCOperand &Op = MCI.getOperand(OpIndex);
|
||||
if (!Op.isReg())
|
||||
continue;
|
||||
|
||||
WriteDescriptor &Write = ID.Writes[CurrentDef];
|
||||
Write.OpIndex = OpIndex;
|
||||
// Assign a default latency for this write.
|
||||
Write.Latency = ID.MaxLatency;
|
||||
Write.SClassOrWriteResourceID = 0;
|
||||
Write.IsOptionalDef = false;
|
||||
++CurrentDef;
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
|
||||
<< ", Latency=" << Write.Latency
|
||||
<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
|
||||
});
|
||||
}
|
||||
|
||||
ID.Writes.resize(CurrentDef);
|
||||
}
|
||||
|
||||
void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
|
||||
@ -368,14 +396,21 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
|
||||
// Remove the optional definition.
|
||||
if (MCDesc.hasOptionalDef())
|
||||
--NumExplicitUses;
|
||||
unsigned TotalUses = NumExplicitUses + NumImplicitUses;
|
||||
|
||||
unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
|
||||
unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
|
||||
ID.Reads.resize(TotalUses);
|
||||
for (unsigned I = 0; I < NumExplicitUses; ++I) {
|
||||
ReadDescriptor &Read = ID.Reads[I];
|
||||
Read.OpIndex = MCDesc.getNumDefs() + I;
|
||||
unsigned CurrentUse = 0;
|
||||
for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
|
||||
++I, ++OpIndex) {
|
||||
const MCOperand &Op = MCI.getOperand(OpIndex);
|
||||
if (!Op.isReg())
|
||||
continue;
|
||||
|
||||
ReadDescriptor &Read = ID.Reads[CurrentUse];
|
||||
Read.OpIndex = OpIndex;
|
||||
Read.UseIndex = I;
|
||||
Read.SchedClassID = SchedClassID;
|
||||
++CurrentUse;
|
||||
LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
|
||||
<< ", UseIndex=" << Read.UseIndex << '\n');
|
||||
}
|
||||
@ -383,7 +418,7 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
|
||||
// For the purpose of ReadAdvance, implicit uses come directly after explicit
|
||||
// uses. The "UseIndex" must be updated according to that implicit layout.
|
||||
for (unsigned I = 0; I < NumImplicitUses; ++I) {
|
||||
ReadDescriptor &Read = ID.Reads[NumExplicitUses + I];
|
||||
ReadDescriptor &Read = ID.Reads[CurrentUse + I];
|
||||
Read.OpIndex = ~I;
|
||||
Read.UseIndex = NumExplicitUses + I;
|
||||
Read.RegisterID = MCDesc.getImplicitUses()[I];
|
||||
@ -392,6 +427,32 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
|
||||
<< ", UseIndex=" << Read.UseIndex << ", RegisterID="
|
||||
<< MRI.getName(Read.RegisterID) << '\n');
|
||||
}
|
||||
|
||||
CurrentUse += NumImplicitUses;
|
||||
|
||||
// FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
|
||||
// "unmodeledSideEffects", then this logic optimistically assumes that any
|
||||
// extra register operands in the variadic sequence are not register
|
||||
// definition.
|
||||
|
||||
bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
|
||||
!MCDesc.hasUnmodeledSideEffects();
|
||||
for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
|
||||
I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
|
||||
const MCOperand &Op = MCI.getOperand(OpIndex);
|
||||
if (!Op.isReg())
|
||||
continue;
|
||||
|
||||
ReadDescriptor &Read = ID.Reads[CurrentUse];
|
||||
Read.OpIndex = OpIndex;
|
||||
Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
|
||||
Read.SchedClassID = SchedClassID;
|
||||
++CurrentUse;
|
||||
LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
|
||||
<< ", UseIndex=" << Read.UseIndex << '\n');
|
||||
}
|
||||
|
||||
ID.Reads.resize(CurrentUse);
|
||||
}
|
||||
|
||||
Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
|
||||
@ -431,10 +492,11 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
|
||||
|
||||
// Then obtain the scheduling class information from the instruction.
|
||||
unsigned SchedClassID = MCDesc.getSchedClass();
|
||||
unsigned CPUID = SM.getProcessorID();
|
||||
bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
|
||||
|
||||
// Try to solve variant scheduling classes.
|
||||
if (SchedClassID) {
|
||||
if (IsVariant) {
|
||||
unsigned CPUID = SM.getProcessorID();
|
||||
while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
|
||||
SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
|
||||
|
||||
@ -493,7 +555,8 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
|
||||
|
||||
// Now add the new descriptor.
|
||||
SchedClassID = MCDesc.getSchedClass();
|
||||
if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
|
||||
bool IsVariadic = MCDesc.isVariadic();
|
||||
if (!IsVariadic && !IsVariant) {
|
||||
Descriptors[MCI.getOpcode()] = std::move(ID);
|
||||
return *Descriptors[MCI.getOpcode()];
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user