mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Split WriteVecLogic into XMM and YMM/ZMM scheduler classes
This removes all the WriteVecLogic InstRW overrides. llvm-svn: 331258
This commit is contained in:
parent
17547d21dd
commit
1fb2ac7d07
@ -191,7 +191,8 @@ def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
|
||||
def : WriteRes<WriteVecMove, [BWPort015]>;
|
||||
|
||||
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1>; // Vector integer ALU op, no logicals.
|
||||
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1>; // Vector integer and/or/xor.
|
||||
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
|
||||
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
|
||||
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
|
||||
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
|
||||
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
|
||||
@ -1162,11 +1163,7 @@ def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup77], (instregex "VPANDNYrm",
|
||||
"VPANDYrm",
|
||||
"VPBLENDDYrmi",
|
||||
"VPORYrm",
|
||||
"VPXORYrm")>;
|
||||
def: InstRW<[BWWriteResGroup77], (instregex "VPBLENDDYrmi")>;
|
||||
|
||||
def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
|
||||
let Latency = 7;
|
||||
|
@ -188,6 +188,7 @@ def : WriteRes<WriteVecMove, [HWPort015]>;
|
||||
|
||||
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>;
|
||||
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
|
||||
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
|
||||
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
|
||||
@ -1071,11 +1072,7 @@ def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> {
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup17_2], (instregex "VPANDNYrm",
|
||||
"VPANDYrm",
|
||||
"VPBLENDDYrmi",
|
||||
"VPORYrm",
|
||||
"VPXORYrm")>;
|
||||
def: InstRW<[HWWriteResGroup17_2], (instregex "VPBLENDDYrmi")>;
|
||||
|
||||
def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> {
|
||||
let Latency = 6;
|
||||
|
@ -168,6 +168,7 @@ def : WriteRes<WriteVecMove, [SBPort05]>;
|
||||
|
||||
defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
|
||||
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
|
||||
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
|
||||
|
@ -189,6 +189,7 @@ def : WriteRes<WriteVecMove, [SKLPort015]>;
|
||||
|
||||
defm : SKLWriteResPair<WriteVecALU, [SKLPort15], 1>; // Vector integer ALU op, no logicals.
|
||||
defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
|
||||
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
|
||||
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply.
|
||||
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>;
|
||||
@ -1609,17 +1610,13 @@ def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm",
|
||||
"VPADDDYrm",
|
||||
"VPADDQYrm",
|
||||
"VPADDWYrm",
|
||||
"VPANDNYrm",
|
||||
"VPANDYrm",
|
||||
"VPBLENDDYrmi",
|
||||
"VPMASKMOVDYrm",
|
||||
"VPMASKMOVQYrm",
|
||||
"VPORYrm",
|
||||
"VPSUBBYrm",
|
||||
"VPSUBDYrm",
|
||||
"VPSUBQYrm",
|
||||
"VPSUBWYrm",
|
||||
"VPXORYrm")>;
|
||||
"VPSUBWYrm")>;
|
||||
|
||||
def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
|
||||
let Latency = 8;
|
||||
|
@ -189,6 +189,7 @@ def : WriteRes<WriteVecMove, [SKXPort015]>;
|
||||
|
||||
defm : SKXWriteResPair<WriteVecALU, [SKXPort15], 1>; // Vector integer ALU op, no logicals.
|
||||
defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
|
||||
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
|
||||
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply.
|
||||
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
|
||||
@ -3034,16 +3035,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
|
||||
"VPADDWYrm",
|
||||
"VPADDWZ256rm(b?)",
|
||||
"VPADDWZrm(b?)",
|
||||
"VPANDDZ256rm(b?)",
|
||||
"VPANDDZrm(b?)",
|
||||
"VPANDNDZ256rm(b?)",
|
||||
"VPANDNDZrm(b?)",
|
||||
"VPANDNQZ256rm(b?)",
|
||||
"VPANDNQZrm(b?)",
|
||||
"VPANDNYrm",
|
||||
"VPANDQZ256rm(b?)",
|
||||
"VPANDQZrm(b?)",
|
||||
"VPANDYrm",
|
||||
"VPBLENDDYrmi",
|
||||
"VPBLENDMBZ256rm(b?)",
|
||||
"VPBLENDMBZrm(b?)",
|
||||
@ -3059,11 +3050,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
|
||||
"VPBROADCASTQZm(b?)",
|
||||
"VPMASKMOVDYrm",
|
||||
"VPMASKMOVQYrm",
|
||||
"VPORDZ256rm(b?)",
|
||||
"VPORDZrm(b?)",
|
||||
"VPORQZ256rm(b?)",
|
||||
"VPORQZrm(b?)",
|
||||
"VPORYrm",
|
||||
"VPSUBBYrm",
|
||||
"VPSUBBZ256rm(b?)",
|
||||
"VPSUBBZrm(b?)",
|
||||
@ -3078,12 +3064,7 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
|
||||
"VPTERNLOGDZ256rm(b?)i",
|
||||
"VPTERNLOGDZrm(b?)i",
|
||||
"VPTERNLOGQZ256rm(b?)i",
|
||||
"VPTERNLOGQZrm(b?)i",
|
||||
"VPXORDZ256rm(b?)",
|
||||
"VPXORDZrm(b?)",
|
||||
"VPXORQZ256rm(b?)",
|
||||
"VPXORQZrm(b?)",
|
||||
"VPXORYrm")>;
|
||||
"VPTERNLOGQZrm(b?)i")>;
|
||||
|
||||
def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
|
||||
let Latency = 8;
|
||||
|
@ -126,6 +126,7 @@ def WriteVecStore : SchedWrite;
|
||||
def WriteVecMove : SchedWrite;
|
||||
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
|
||||
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
|
||||
defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
|
||||
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
|
||||
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
|
||||
defm WritePMULLD : X86SchedWritePair; // PMULLD
|
||||
@ -225,7 +226,7 @@ def SchedWriteVecALU
|
||||
: X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALU, WriteVecALU>;
|
||||
def SchedWriteVecLogic
|
||||
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
|
||||
WriteVecLogic, WriteVecLogic>;
|
||||
WriteVecLogicY, WriteVecLogicY>;
|
||||
def SchedWriteVecShift
|
||||
: X86SchedWriteWidths<WriteVecShift, WriteVecShift,
|
||||
WriteVecShift, WriteVecShift>;
|
||||
|
@ -245,6 +245,7 @@ def : WriteRes<WriteVecMove, [AtomPort01]>;
|
||||
|
||||
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
|
||||
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
|
@ -407,6 +407,7 @@ defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
|
||||
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
|
||||
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteVecLogicY, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
|
||||
defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
|
||||
defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
|
||||
|
@ -156,6 +156,7 @@ def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
|
||||
|
||||
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
|
||||
// FIXME: The below is closer to correct, but caused some perf regressions.
|
||||
|
@ -227,6 +227,7 @@ def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; }
|
||||
|
||||
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
|
||||
|
@ -1368,7 +1368,7 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||
; GENERIC-LABEL: test_pand:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
|
||||
; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
|
||||
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
@ -1417,7 +1417,7 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||
; GENERIC-LABEL: test_pandn:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
|
||||
; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
|
||||
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
@ -5039,7 +5039,7 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||
; GENERIC-LABEL: test_por:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
|
||||
; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
|
||||
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
@ -7065,7 +7065,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||
; GENERIC-LABEL: test_pxor:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
|
||||
; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
|
||||
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -2829,7 +2829,7 @@ define <8 x float> @ubto8f32(<8 x i32> %a) {
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:0.50]
|
||||
; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: ubto8f32:
|
||||
|
Loading…
x
Reference in New Issue
Block a user