1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86][SchedModel] Add missing scheduling model for SSE related instructions.

The patch defines new or refines existing generic scheduling classes to match
the behavior of the SSE instructions.
It also maps those scheduling classes on the related SSE instructions.

<rdar://problem/15607571>

llvm-svn: 202065
This commit is contained in:
Quentin Colombet 2014-02-24 19:33:51 +00:00
parent 2ff7280e46
commit 282bf4e578
4 changed files with 752 additions and 277 deletions

File diff suppressed because it is too large Load Diff

View File

@ -50,6 +50,7 @@ def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>;
def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
@ -125,6 +126,18 @@ defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
defm : HWWriteResPair<WriteFShuffle, HWPort5, 1>;
defm : HWWriteResPair<WriteFBlend, HWPort015, 1>;
defm : HWWriteResPair<WriteFShuffle256, HWPort5, 3>;
def : WriteRes<WriteFVarBlend, [HWPort5]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteFVarBlendLd, [HWPort5, HWPort23]> {
let Latency = 6;
let ResourceCycles = [2, 1];
}
// Vector integer operations.
defm : HWWriteResPair<WriteVecShift, HWPort0, 1>;
@ -132,7 +145,117 @@ defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
defm : HWWriteResPair<WriteShuffle, HWPort5, 1>;
defm : HWWriteResPair<WriteBlend, HWPort15, 1>;
defm : HWWriteResPair<WriteShuffle256, HWPort5, 3>;
def : WriteRes<WriteVarBlend, [HWPort5]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteVarBlendLd, [HWPort5, HWPort23]> {
let Latency = 6;
let ResourceCycles = [2, 1];
}
def : WriteRes<WriteVarVecShift, [HWPort0, HWPort5]> {
let Latency = 2;
let ResourceCycles = [2, 1];
}
def : WriteRes<WriteVarVecShiftLd, [HWPort0, HWPort5, HWPort23]> {
let Latency = 6;
let ResourceCycles = [2, 1, 1];
}
def : WriteRes<WriteMPSAD, [HWPort0, HWPort5]> {
let Latency = 6;
let ResourceCycles = [1, 2];
}
def : WriteRes<WriteMPSADLd, [HWPort23, HWPort0, HWPort5]> {
let Latency = 6;
let ResourceCycles = [1, 1, 2];
}
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [HWPort0]> {
let Latency = 10;
let ResourceCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> {
let Latency = 10;
let ResourceCycles = [3, 1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort16, HWPort5]> {
let Latency = 10;
let ResourceCycles = [3, 2, 4];
}
def : WriteRes<WritePCmpEStrMLd, [HWPort05, HWPort16, HWPort23]> {
let Latency = 10;
let ResourceCycles = [6, 2, 1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [HWPort0]> {
let Latency = 11;
let ResourceCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> {
let Latency = 11;
let ResourceCycles = [3, 1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [HWPort05, HWPort16]> {
let Latency = 11;
let ResourceCycles = [6, 2];
}
def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort16, HWPort5, HWPort23]> {
let Latency = 11;
let ResourceCycles = [3, 2, 2, 1];
}
// AES Instructions.
def : WriteRes<WriteAESDecEnc, [HWPort5]> {
let Latency = 7;
let ResourceCycles = [1];
}
def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> {
let Latency = 7;
let ResourceCycles = [1, 1];
}
def : WriteRes<WriteAESIMC, [HWPort5]> {
let Latency = 14;
let ResourceCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> {
let Latency = 14;
let ResourceCycles = [2, 1];
}
def : WriteRes<WriteAESKeyGen, [HWPort0, HWPort5]> {
let Latency = 10;
let ResourceCycles = [2, 8];
}
def : WriteRes<WriteAESKeyGenLd, [HWPort0, HWPort5, HWPort23]> {
let Latency = 10;
let ResourceCycles = [2, 7, 1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> {
let Latency = 7;
let ResourceCycles = [2, 1];
}
def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
let Latency = 7;
let ResourceCycles = [2, 1, 1];
}
def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
def : WriteRes<WriteNop, []>;
} // SchedModel

View File

@ -118,6 +118,16 @@ defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
defm : SBWriteResPair<WriteFShuffle, SBPort5, 1>;
defm : SBWriteResPair<WriteFBlend, SBPort05, 1>;
def : WriteRes<WriteFVarBlend, [SBPort0, SBPort5]> {
let Latency = 2;
let ResourceCycles = [1, 1];
}
def : WriteRes<WriteFVarBlendLd, [SBPort0, SBPort5, SBPort23]> {
let Latency = 6;
let ResourceCycles = [1, 1, 1];
}
// Vector integer operations.
defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
@ -125,7 +135,112 @@ defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
defm : SBWriteResPair<WriteBlend, SBPort15, 1>;
def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> {
let Latency = 2;
let ResourceCycles = [1, 1];
}
def : WriteRes<WriteVarBlendLd, [SBPort1, SBPort5, SBPort23]> {
let Latency = 6;
let ResourceCycles = [1, 1, 1];
}
def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> {
let Latency = 6;
let ResourceCycles = [1, 1, 1];
}
def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
let Latency = 6;
let ResourceCycles = [1, 1, 1, 1];
}
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [SBPort015]> {
let Latency = 11;
let ResourceCycles = [3];
}
def : WriteRes<WritePCmpIStrMLd, [SBPort015, SBPort23]> {
let Latency = 11;
let ResourceCycles = [3, 1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SBPort015]> {
let Latency = 11;
let ResourceCycles = [8];
}
def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
let Latency = 11;
let ResourceCycles = [7, 1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [SBPort015]> {
let Latency = 3;
let ResourceCycles = [3];
}
def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> {
let Latency = 3;
let ResourceCycles = [3, 1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SBPort015]> {
let Latency = 4;
let ResourceCycles = [8];
}
def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
let Latency = 4;
let ResourceCycles = [7, 1];
}
// AES Instructions.
def : WriteRes<WriteAESDecEnc, [SBPort015]> {
let Latency = 8;
let ResourceCycles = [2];
}
def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> {
let Latency = 8;
let ResourceCycles = [2, 1];
}
def : WriteRes<WriteAESIMC, [SBPort015]> {
let Latency = 8;
let ResourceCycles = [2];
}
def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> {
let Latency = 8;
let ResourceCycles = [2, 1];
}
def : WriteRes<WriteAESKeyGen, [SBPort015]> {
let Latency = 8;
let ResourceCycles = [11];
}
def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
let Latency = 8;
let ResourceCycles = [10, 1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SBPort015]> {
let Latency = 14;
let ResourceCycles = [18];
}
def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
let Latency = 14;
let ResourceCycles = [17, 1];
}
def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
def : WriteRes<WriteFence, [SBPort23, SBPort4]>;
def : WriteRes<WriteNop, []>;
// AVX2 is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
defm : SBWriteResPair<WriteFShuffle256, SBPort0, 1>;
defm : SBWriteResPair<WriteShuffle256, SBPort0, 1>;
defm : SBWriteResPair<WriteVarVecShift, SBPort0, 1>;
} // SchedModel

View File

@ -69,6 +69,9 @@ defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@ -77,23 +80,55 @@ class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
// Vector bitwise operations.
// These are often used on both floating point and integer vectors.
defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
// Conversion between integer and float.
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
defm WritePCmpIStrM : X86SchedWritePair;
// Packed Compare Explicit Length Strings, Return Mask
defm WritePCmpEStrM : X86SchedWritePair;
// Packed Compare Implicit Length Strings, Return Index
defm WritePCmpIStrI : X86SchedWritePair;
// Packed Compare Explicit Length Strings, Return Index
defm WritePCmpEStrI : X86SchedWritePair;
// AES instructions.
defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
// Carry-less multiplication instructions.
defm WriteCLMul : X86SchedWritePair;
// Catch-all for expensive system instructions.
def WriteSystem : SchedWrite;
// AVX2.
defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
// Fence instructions.
def WriteFence : SchedWrite;
// Nop, not very useful expect it provides a model for nops!
def WriteNop : SchedWrite;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for X86
def IIC_ALU_MEM : InstrItinClass;