1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[AArch64] Update Cortex-A55 SchedModel to improve LDP scheduling

Specifying the latencies of specific LDP variants appears to improve
performance almost universally.

Differential Revision: https://reviews.llvm.org/D105882
This commit is contained in:
Nicholas Guy 2021-07-12 10:36:35 +01:00
parent a9d31ba311
commit 340bf152dd
2 changed files with 88 additions and 80 deletions

View File

@ -90,6 +90,7 @@ def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
let ResourceCycles = [3]; }
def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
def CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; }
def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
let ResourceCycles = [2]; }
def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
@ -105,6 +106,10 @@ def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
let ResourceCycles = [8]; }
def CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; }
def CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; }
def CortexA55WriteLDP4 : SchedWriteRes<[CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd]> { let Latency = 6; }
// Pre/Post Indexing - Performed as part of address generation
def : WriteRes<WriteAdr, []> { let Latency = 0; }
@ -227,7 +232,10 @@ def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
//---
// Miscellaneous
//---
def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>;
def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W")>;
def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS[^W]")>;
def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)")>;
def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ")>;
def : InstRW<[WriteI], (instrs COPY)>;
//---
// Vector Loads - 64-bit per cycle

View File

@ -2409,63 +2409,63 @@ drps
# CHECK-NEXT: 1 4 1.00 * ldr q17, [x23, w9, sxtw]
# CHECK-NEXT: 1 1 1.00 * str q18, [x22, w10, sxtw]
# CHECK-NEXT: 1 4 1.00 * ldr q19, [x21, wzr, sxtw #4]
# CHECK-NEXT: 2 5 3.00 * ldp w3, w5, [sp]
# CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp]
# CHECK-NEXT: 1 1 1.00 * stp wzr, w9, [sp, #252]
# CHECK-NEXT: 2 5 3.00 * ldp w2, wzr, [sp, #-256]
# CHECK-NEXT: 2 5 3.00 * ldp w9, w10, [sp, #4]
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [sp, #4]
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [x2, #-256]
# CHECK-NEXT: 2 5 3.00 * ldpsw x20, x30, [sp, #252]
# CHECK-NEXT: 2 5 3.00 * ldp x21, x29, [x2, #504]
# CHECK-NEXT: 2 5 3.00 * ldp x22, x23, [x3, #-512]
# CHECK-NEXT: 2 5 3.00 * ldp x24, x25, [x4, #8]
# CHECK-NEXT: 2 5 3.00 * ldp s29, s28, [sp, #252]
# CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp, #-256]
# CHECK-NEXT: 2 4 1.00 * ldp w9, w10, [sp, #4]
# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [sp, #4]
# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [x2, #-256]
# CHECK-NEXT: 2 4 1.00 * ldpsw x20, x30, [sp, #252]
# CHECK-NEXT: 2 5 2.00 * ldp x21, x29, [x2, #504]
# CHECK-NEXT: 2 5 2.00 * ldp x22, x23, [x3, #-512]
# CHECK-NEXT: 2 5 2.00 * ldp x24, x25, [x4, #8]
# CHECK-NEXT: 2 4 1.00 * ldp s29, s28, [sp, #252]
# CHECK-NEXT: 1 1 1.00 * stp s27, s26, [sp, #-256]
# CHECK-NEXT: 2 5 3.00 * ldp s1, s2, [x3, #44]
# CHECK-NEXT: 2 4 1.00 * ldp s1, s2, [x3, #44]
# CHECK-NEXT: 1 1 1.00 * stp d3, d5, [x9, #504]
# CHECK-NEXT: 1 1 1.00 * stp d7, d11, [x10, #-512]
# CHECK-NEXT: 2 5 3.00 * ldp d2, d3, [x30, #-8]
# CHECK-NEXT: 2 5 2.00 * ldp d2, d3, [x30, #-8]
# CHECK-NEXT: 1 1 1.00 * stp q3, q5, [sp]
# CHECK-NEXT: 1 1 1.00 * stp q17, q19, [sp, #1008]
# CHECK-NEXT: 2 5 3.00 * ldp q23, q29, [x1, #-1024]
# CHECK-NEXT: 2 5 3.00 * ldp w3, w5, [sp], #0
# CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1, #-1024]
# CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp], #0
# CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp], #252
# CHECK-NEXT: 2 5 3.00 * ldp w2, wzr, [sp], #-256
# CHECK-NEXT: 2 5 3.00 * ldp w9, w10, [sp], #4
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [sp], #4
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [x2], #-256
# CHECK-NEXT: 2 5 3.00 * ldpsw x20, x30, [sp], #252
# CHECK-NEXT: 2 5 3.00 * ldp x21, x29, [x2], #504
# CHECK-NEXT: 2 5 3.00 * ldp x22, x23, [x3], #-512
# CHECK-NEXT: 2 5 3.00 * ldp x24, x25, [x4], #8
# CHECK-NEXT: 2 5 3.00 * ldp s29, s28, [sp], #252
# CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp], #-256
# CHECK-NEXT: 2 4 1.00 * ldp w9, w10, [sp], #4
# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [sp], #4
# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [x2], #-256
# CHECK-NEXT: 2 4 1.00 * ldpsw x20, x30, [sp], #252
# CHECK-NEXT: 2 5 2.00 * ldp x21, x29, [x2], #504
# CHECK-NEXT: 2 5 2.00 * ldp x22, x23, [x3], #-512
# CHECK-NEXT: 2 5 2.00 * ldp x24, x25, [x4], #8
# CHECK-NEXT: 2 4 1.00 * ldp s29, s28, [sp], #252
# CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp], #-256
# CHECK-NEXT: 2 5 3.00 * ldp s1, s2, [x3], #44
# CHECK-NEXT: 2 4 1.00 * ldp s1, s2, [x3], #44
# CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9], #504
# CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10], #-512
# CHECK-NEXT: 2 5 3.00 * ldp d2, d3, [x30], #-8
# CHECK-NEXT: 2 5 2.00 * ldp d2, d3, [x30], #-8
# CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp], #0
# CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp], #1008
# CHECK-NEXT: 2 5 3.00 * ldp q23, q29, [x1], #-1024
# CHECK-NEXT: 2 5 3.00 * ldp w3, w5, [sp, #0]!
# CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1], #-1024
# CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp, #0]!
# CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp, #252]!
# CHECK-NEXT: 2 5 3.00 * ldp w2, wzr, [sp, #-256]!
# CHECK-NEXT: 2 5 3.00 * ldp w9, w10, [sp, #4]!
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [sp, #4]!
# CHECK-NEXT: 2 5 3.00 * ldpsw x9, x10, [x2, #-256]!
# CHECK-NEXT: 2 5 3.00 * ldpsw x20, x30, [sp, #252]!
# CHECK-NEXT: 2 5 3.00 * ldp x21, x29, [x2, #504]!
# CHECK-NEXT: 2 5 3.00 * ldp x22, x23, [x3, #-512]!
# CHECK-NEXT: 2 5 3.00 * ldp x24, x25, [x4, #8]!
# CHECK-NEXT: 2 5 3.00 * ldp s29, s28, [sp, #252]!
# CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp, #-256]!
# CHECK-NEXT: 2 4 1.00 * ldp w9, w10, [sp, #4]!
# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [sp, #4]!
# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [x2, #-256]!
# CHECK-NEXT: 2 4 1.00 * ldpsw x20, x30, [sp, #252]!
# CHECK-NEXT: 2 5 2.00 * ldp x21, x29, [x2, #504]!
# CHECK-NEXT: 2 5 2.00 * ldp x22, x23, [x3, #-512]!
# CHECK-NEXT: 2 5 2.00 * ldp x24, x25, [x4, #8]!
# CHECK-NEXT: 2 4 1.00 * ldp s29, s28, [sp, #252]!
# CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp, #-256]!
# CHECK-NEXT: 2 5 3.00 * ldp s1, s2, [x3, #44]!
# CHECK-NEXT: 2 4 1.00 * ldp s1, s2, [x3, #44]!
# CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9, #504]!
# CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10, #-512]!
# CHECK-NEXT: 2 5 3.00 * ldp d2, d3, [x30, #-8]!
# CHECK-NEXT: 2 5 2.00 * ldp d2, d3, [x30, #-8]!
# CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp, #0]!
# CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp, #1008]!
# CHECK-NEXT: 2 5 3.00 * ldp q23, q29, [x1, #-1024]!
# CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1, #-1024]!
# CHECK-NEXT: 2 5 2.00 * ldnp w3, w5, [sp]
# CHECK-NEXT: 1 1 1.00 * stnp wzr, w9, [sp, #252]
# CHECK-NEXT: 2 5 2.00 * ldnp w2, wzr, [sp, #-256]
@ -2556,7 +2556,7 @@ drps
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8]
# CHECK-NEXT: 232.50 232.50 22.00 32.00 110.50 110.50 87.00 6.00 6.00 361.00 51.00 128.00
# CHECK-NEXT: 232.50 232.50 22.00 32.00 110.50 110.50 87.00 6.00 6.00 310.00 51.00 128.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions:
@ -3592,63 +3592,63 @@ drps
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldr q17, [x23, w9, sxtw]
# CHECK-NEXT: - - - - - - - - - - - 1.00 str q18, [x22, w10, sxtw]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldr q19, [x21, wzr, sxtw #4]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w3, w5, [sp]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w3, w5, [sp]
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp wzr, w9, [sp, #252]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w2, wzr, [sp, #-256]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w9, w10, [sp, #4]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x9, x10, [sp, #4]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x9, x10, [x2, #-256]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x20, x30, [sp, #252]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x21, x29, [x2, #504]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x22, x23, [x3, #-512]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x24, x25, [x4, #8]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp s29, s28, [sp, #252]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w2, wzr, [sp, #-256]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w9, w10, [sp, #4]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x9, x10, [sp, #4]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x9, x10, [x2, #-256]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x20, x30, [sp, #252]
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x21, x29, [x2, #504]
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x22, x23, [x3, #-512]
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x24, x25, [x4, #8]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp s29, s28, [sp, #252]
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp s27, s26, [sp, #-256]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp s1, s2, [x3, #44]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp s1, s2, [x3, #44]
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp d3, d5, [x9, #504]
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp d7, d11, [x10, #-512]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp d2, d3, [x30, #-8]
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp d2, d3, [x30, #-8]
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp q3, q5, [sp]
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp q17, q19, [sp, #1008]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp q23, q29, [x1, #-1024]
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w3, w5, [sp], #0
# CHECK-NEXT: - - - - - - - - - 6.00 - - ldp q23, q29, [x1, #-1024]
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w3, w5, [sp], #0
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp wzr, w9, [sp], #252
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w2, wzr, [sp], #-256
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w9, w10, [sp], #4
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x9, x10, [sp], #4
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x9, x10, [x2], #-256
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x20, x30, [sp], #252
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x21, x29, [x2], #504
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x22, x23, [x3], #-512
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x24, x25, [x4], #8
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp s29, s28, [sp], #252
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w2, wzr, [sp], #-256
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w9, w10, [sp], #4
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x9, x10, [sp], #4
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x9, x10, [x2], #-256
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x20, x30, [sp], #252
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x21, x29, [x2], #504
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x22, x23, [x3], #-512
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x24, x25, [x4], #8
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp s29, s28, [sp], #252
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp s27, s26, [sp], #-256
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp s1, s2, [x3], #44
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp s1, s2, [x3], #44
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp d3, d5, [x9], #504
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp d7, d11, [x10], #-512
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp d2, d3, [x30], #-8
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp d2, d3, [x30], #-8
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp q3, q5, [sp], #0
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp q17, q19, [sp], #1008
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp q23, q29, [x1], #-1024
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w3, w5, [sp, #0]!
# CHECK-NEXT: - - - - - - - - - 6.00 - - ldp q23, q29, [x1], #-1024
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w3, w5, [sp, #0]!
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp wzr, w9, [sp, #252]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w2, wzr, [sp, #-256]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp w9, w10, [sp, #4]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x9, x10, [sp, #4]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x9, x10, [x2, #-256]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldpsw x20, x30, [sp, #252]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x21, x29, [x2, #504]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x22, x23, [x3, #-512]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp x24, x25, [x4, #8]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp s29, s28, [sp, #252]!
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w2, wzr, [sp, #-256]!
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp w9, w10, [sp, #4]!
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x9, x10, [sp, #4]!
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x9, x10, [x2, #-256]!
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x20, x30, [sp, #252]!
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x21, x29, [x2, #504]!
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x22, x23, [x3, #-512]!
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp x24, x25, [x4, #8]!
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp s29, s28, [sp, #252]!
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp s27, s26, [sp, #-256]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp s1, s2, [x3, #44]!
# CHECK-NEXT: - - - - - - - - - 1.00 - - ldp s1, s2, [x3, #44]!
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp d3, d5, [x9, #504]!
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp d7, d11, [x10, #-512]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp d2, d3, [x30, #-8]!
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldp d2, d3, [x30, #-8]!
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp q3, q5, [sp, #0]!
# CHECK-NEXT: - - - - - - - - - - - 1.00 stp q17, q19, [sp, #1008]!
# CHECK-NEXT: - - - - - - - - - 3.00 - - ldp q23, q29, [x1, #-1024]!
# CHECK-NEXT: - - - - - - - - - 6.00 - - ldp q23, q29, [x1, #-1024]!
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldnp w3, w5, [sp]
# CHECK-NEXT: - - - - - - - - - - - 1.00 stnp wzr, w9, [sp, #252]
# CHECK-NEXT: - - - - - - - - - 2.00 - - ldnp w2, wzr, [sp, #-256]