1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[X86][BtVer2] Teach how to identify zero-idiom VPERM2F128rr instructions.

This patch adds another variant class to identify zero-idiom VPERM2F128rr
instructions.

On Jaguar, a VPERM wih bit 3 and 7 of the mask set, is a zero-idiom.

Differential Revision: https://reviews.llvm.org/D52663

llvm-svn: 343452
This commit is contained in:
Andrea Di Biagio 2018-10-01 10:35:13 +00:00
parent f62f8e4508
commit 96c5b6d4fa
3 changed files with 31 additions and 16 deletions

View File

@ -19,6 +19,13 @@
// different zero-idioms.
def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
// A predicate used to identify VPERM that have bits 3 and 7 of their mask set.
// On some processors, these VPERM instructions are zero-idioms.
def ZeroIdiomVPERMPredicate : CheckAll<[
ZeroIdiomPredicate,
CheckImmOperand<3, 0x88>
]>;
// A predicate used to check if a LEA instruction uses all three source
// operands: base, index, and offset.
def IsThreeOperandsLEAPredicate: CheckAll<[

View File

@ -688,6 +688,12 @@ def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
PCMPGTQrr, VPCMPGTQrr,
PCMPGTWrr, VPCMPGTWrr)>;
def JWriteVPERM2F128 : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomVPERMPredicate>, [JWriteZeroIdiomYmm]>,
SchedVar<NoSchedPred, [WriteFShuffle256]>
]>;
def : InstRW<[JWriteVPERM2F128], (instrs VPERM2F128rr)>;
// This write is used for slow LEA instructions.
def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
let Latency = 2;
@ -762,7 +768,9 @@ def : IsZeroIdiomFunction<[
// ymm variants.
VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
], ZeroIdiomPredicate>
], ZeroIdiomPredicate>,
DepBreakingClass<[ VPERM2F128rr ], ZeroIdiomVPERMPredicate>
]>;
def : IsDepBreakingFunction<[

View File

@ -330,12 +330,12 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 200
# CHECK-NEXT: Total Cycles: 403
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 2
# CHECK-NEXT: uOps Per Cycle: 0.99
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: uOps Per Cycle: 1.95
# CHECK-NEXT: IPC: 0.98
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@ -347,7 +347,7 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 1 1.00 vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 1 0.50 vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 3 2.00 vaddps %ymm1, %ymm1, %ymm0
# CHECK: Resources:
@ -368,23 +368,23 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - -
# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm1, %ymm1, %ymm0
# CHECK: Timeline view:
# CHECK-NEXT: 01234
# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeeeER . . vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [1,0] . D==eER . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . D==eeeER . vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [2,0] . D====eER . vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . D====eeeER vaddps %ymm1, %ymm1, %ymm0
# CHECK: [0,0] DeER . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] .DeeeER . vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [1,0] . DeE-R . vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeeeER . vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [2,0] . DeE-R . vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . DeeeER vaddps %ymm1, %ymm1, %ymm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@ -393,5 +393,5 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 3.0 0.3 0.0 vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 3.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: 0. 3 1.0 1.0 0.7 vperm2f128 $136, %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 1.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0