mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86][BtVer2] Teach how to identify zero-idiom VPERM2F128rr instructions.
This patch adds another variant class to identify zero-idiom VPERM2F128rr instructions. On Jaguar, a VPERM wih bit 3 and 7 of the mask set, is a zero-idiom. Differential Revision: https://reviews.llvm.org/D52663 llvm-svn: 343452
This commit is contained in:
parent
f62f8e4508
commit
96c5b6d4fa
@ -19,6 +19,13 @@
|
||||
// different zero-idioms.
|
||||
def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
|
||||
|
||||
// A predicate used to identify VPERM that have bits 3 and 7 of their mask set.
|
||||
// On some processors, these VPERM instructions are zero-idioms.
|
||||
def ZeroIdiomVPERMPredicate : CheckAll<[
|
||||
ZeroIdiomPredicate,
|
||||
CheckImmOperand<3, 0x88>
|
||||
]>;
|
||||
|
||||
// A predicate used to check if a LEA instruction uses all three source
|
||||
// operands: base, index, and offset.
|
||||
def IsThreeOperandsLEAPredicate: CheckAll<[
|
||||
|
@ -688,6 +688,12 @@ def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
|
||||
PCMPGTQrr, VPCMPGTQrr,
|
||||
PCMPGTWrr, VPCMPGTWrr)>;
|
||||
|
||||
def JWriteVPERM2F128 : SchedWriteVariant<[
|
||||
SchedVar<MCSchedPredicate<ZeroIdiomVPERMPredicate>, [JWriteZeroIdiomYmm]>,
|
||||
SchedVar<NoSchedPred, [WriteFShuffle256]>
|
||||
]>;
|
||||
def : InstRW<[JWriteVPERM2F128], (instrs VPERM2F128rr)>;
|
||||
|
||||
// This write is used for slow LEA instructions.
|
||||
def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
|
||||
let Latency = 2;
|
||||
@ -762,7 +768,9 @@ def : IsZeroIdiomFunction<[
|
||||
|
||||
// ymm variants.
|
||||
VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
|
||||
], ZeroIdiomPredicate>
|
||||
], ZeroIdiomPredicate>,
|
||||
|
||||
DepBreakingClass<[ VPERM2F128rr ], ZeroIdiomVPERMPredicate>
|
||||
]>;
|
||||
|
||||
def : IsDepBreakingFunction<[
|
||||
|
@ -330,12 +330,12 @@ vaddps %ymm1, %ymm1, %ymm0
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 200
|
||||
# CHECK-NEXT: Total Cycles: 403
|
||||
# CHECK-NEXT: Total Cycles: 205
|
||||
# CHECK-NEXT: Total uOps: 400
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.99
|
||||
# CHECK-NEXT: IPC: 0.50
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.95
|
||||
# CHECK-NEXT: IPC: 0.98
|
||||
# CHECK-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
@ -347,7 +347,7 @@ vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 2 1 1.00 vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: 2 1 0.50 vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: 2 3 2.00 vaddps %ymm1, %ymm1, %ymm0
|
||||
|
||||
# CHECK: Resources:
|
||||
@ -368,23 +368,23 @@ vaddps %ymm1, %ymm1, %ymm0
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - -
|
||||
# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm1, %ymm1, %ymm0
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 01234
|
||||
# CHECK-NEXT: 0
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeER . . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: [0,1] .DeeeER . . vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK-NEXT: [1,0] . D==eER . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: [1,1] . D==eeeER . vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK-NEXT: [2,0] . D====eER . vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: [2,1] . D====eeeER vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK: [0,0] DeER . . vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: [0,1] .DeeeER . vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK-NEXT: [1,0] . DeE-R . vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: [1,1] . DeeeER . vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK-NEXT: [2,0] . DeE-R . vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: [2,1] . DeeeER vaddps %ymm1, %ymm1, %ymm0
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
@ -393,5 +393,5 @@ vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 3.0 0.3 0.0 vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: 1. 3 3.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0
|
||||
# CHECK-NEXT: 0. 3 1.0 1.0 0.7 vperm2f128 $136, %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: 1. 3 1.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0
|
||||
|
Loading…
x
Reference in New Issue
Block a user