mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[X86] Add WriteFMOVMSK/WriteVecMOVMSK/WriteMMXMOVMSK scheduler classes
Currently MOVMSK instructions use the WriteVecLogic class, which is a very poor choice given that MOVMSK involves a SSE->GPR transfer. Differential Revision: https://reviews.llvm.org/D44924 llvm-svn: 328664
This commit is contained in:
parent
5bf977267e
commit
92d89e3072
@ -635,7 +635,7 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
|
||||
"pmovmskb\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32orGR64:$dst,
|
||||
(int_x86_mmx_pmovmskb VR64:$src))],
|
||||
IIC_MMX_MOVMSK>, Sched<[WriteVecLogic]>;
|
||||
IIC_MMX_MOVMSK>, Sched<[WriteMMXMOVMSK]>;
|
||||
|
||||
// Low word of XMM to MMX.
|
||||
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
|
||||
|
@ -2593,7 +2593,7 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
|
||||
def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], IIC_SSE_MOVMSK, d>,
|
||||
Sched<[WriteVecLogic]>;
|
||||
Sched<[WriteFMOVMSK]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
@ -4271,7 +4271,7 @@ defm PINSRW : sse2_pinsrw, PD;
|
||||
// SSE2 - Packed Mask Creation
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecMOVMSK] in {
|
||||
|
||||
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
|
||||
(ins VR128:$src),
|
||||
@ -4283,8 +4283,8 @@ let Predicates = [HasAVX2] in {
|
||||
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
|
||||
(ins VR256:$src),
|
||||
"pmovmskb\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
[(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))],
|
||||
IIC_SSE_MOVMSK>, VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
|
||||
|
@ -227,6 +227,11 @@ def : WriteRes<WritePCmpEStrILd, [BWPort0, BWPort5, BWPort23, BWPort0156]> {
|
||||
let ResourceCycles = [4,3,1,1];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [BWPort0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMOVMSK, [BWPort0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [BWPort0]> { let Latency = 1; }
|
||||
|
||||
// AES instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [BWPort5]> { // Decryption, encryption.
|
||||
let Latency = 7;
|
||||
@ -297,7 +302,6 @@ def BWWriteResGroup1 : SchedWriteRes<[BWPort0]> {
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64from64rr",
|
||||
"MMX_MOVD64grr",
|
||||
"MMX_PMOVMSKBrr",
|
||||
"MMX_PSLLDri",
|
||||
"MMX_PSLLDrr",
|
||||
"MMX_PSLLQri",
|
||||
@ -839,15 +843,6 @@ def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr",
|
||||
"STOSQ",
|
||||
"STOSW")>;
|
||||
|
||||
def BWWriteResGroup26 : SchedWriteRes<[BWPort0]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup26], (instregex "(V?)MOVMSKPD(Y?)rr",
|
||||
"(V?)MOVMSKPS(Y?)rr",
|
||||
"(V?)PMOVMSKB(Y?)rr")>;
|
||||
|
||||
def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 1;
|
||||
|
@ -216,6 +216,11 @@ def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort5, HWPort23, HWPort0156]> {
|
||||
let ResourceCycles = [4,3,1,1];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [HWPort0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMOVMSK, [HWPort0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [HWPort0]> { let Latency = 1; }
|
||||
|
||||
// AES Instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [HWPort5]> {
|
||||
let Latency = 7;
|
||||
@ -658,7 +663,6 @@ def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> {
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64from64rr",
|
||||
"MMX_MOVD64grr",
|
||||
"MMX_PMOVMSKBrr",
|
||||
"MMX_PSLLDri",
|
||||
"MMX_PSLLDrr",
|
||||
"MMX_PSLLQri",
|
||||
@ -1763,15 +1767,6 @@ def HWWriteResGroup48 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort
|
||||
def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m",
|
||||
"FARCALL64")>;
|
||||
|
||||
def HWWriteResGroup49 : SchedWriteRes<[HWPort0]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup49], (instregex "(V?)MOVMSKPD(Y?)rr",
|
||||
"(V?)MOVMSKPS(Y?)rr",
|
||||
"(V?)PMOVMSKB(Y?)rr")>;
|
||||
|
||||
def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
|
||||
let Latency = 3;
|
||||
let NumMicroOps = 1;
|
||||
|
@ -204,6 +204,11 @@ def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
|
||||
let ResourceCycles = [7, 1];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [SBPort0]> { let Latency = 2; }
|
||||
def : WriteRes<WriteVecMOVMSK, [SBPort0]> { let Latency = 2; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [SBPort0]> { let Latency = 1; }
|
||||
|
||||
// AES Instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
|
||||
let Latency = 7;
|
||||
@ -527,10 +532,7 @@ def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
|
||||
let NumMicroOps = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup7], (instregex "(V?)PMOVMSKBrr",
|
||||
"(V?)MOVMSKPD(Y?)rr",
|
||||
"(V?)MOVMSKPS(Y?)rr",
|
||||
"(V?)MOVPDI2DIrr",
|
||||
def: InstRW<[SBWriteResGroup7], (instregex "(V?)MOVPDI2DIrr",
|
||||
"(V?)MOVPQIto64rr")>;
|
||||
|
||||
def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
|
||||
|
@ -224,6 +224,11 @@ def : WriteRes<WritePCmpEStrILd, [SKLPort0, SKLPort5, SKLPort23, SKLPort0156]> {
|
||||
let ResourceCycles = [4,3,1,1];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [SKLPort0]> { let Latency = 2; }
|
||||
def : WriteRes<WriteVecMOVMSK, [SKLPort0]> { let Latency = 2; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [SKLPort0]> { let Latency = 2; }
|
||||
|
||||
// AES instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [SKLPort0]> { // Decryption, encryption.
|
||||
let Latency = 4;
|
||||
@ -692,14 +697,10 @@ def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> {
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr",
|
||||
"MMX_MOVD64grr",
|
||||
"MMX_PMOVMSKBrr",
|
||||
"(V?)COMISDrr",
|
||||
"(V?)COMISSrr",
|
||||
"(V?)MOVMSKPD(Y?)rr",
|
||||
"(V?)MOVMSKPS(Y?)rr",
|
||||
"(V?)MOVPDI2DIrr",
|
||||
"(V?)MOVPQIto64rr",
|
||||
"(V?)PMOVMSKB(Y?)rr",
|
||||
"VTESTPD(Y?)rr",
|
||||
"VTESTPS(Y?)rr",
|
||||
"(V?)UCOMISDrr",
|
||||
|
@ -224,6 +224,11 @@ def : WriteRes<WritePCmpEStrILd, [SKXPort0, SKXPort5, SKXPort23, SKXPort0156]> {
|
||||
let ResourceCycles = [4,3,1,1];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [SKXPort0]> { let Latency = 2; }
|
||||
def : WriteRes<WriteVecMOVMSK, [SKXPort0]> { let Latency = 2; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; }
|
||||
|
||||
// AES instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption.
|
||||
let Latency = 4;
|
||||
@ -1444,28 +1449,18 @@ def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr",
|
||||
"COMISSrr",
|
||||
"MMX_MOVD64from64rr",
|
||||
"MMX_MOVD64grr",
|
||||
"MMX_PMOVMSKBrr",
|
||||
"MOVMSKPDrr",
|
||||
"MOVMSKPSrr",
|
||||
"MOVPDI2DIrr",
|
||||
"MOVPQIto64rr",
|
||||
"PMOVMSKBrr",
|
||||
"UCOMISDrr",
|
||||
"UCOMISSrr",
|
||||
"VCOMISDZrr(b?)",
|
||||
"VCOMISDrr",
|
||||
"VCOMISSZrr(b?)",
|
||||
"VCOMISSrr",
|
||||
"VMOVMSKPDYrr",
|
||||
"VMOVMSKPDrr",
|
||||
"VMOVMSKPSYrr",
|
||||
"VMOVMSKPSrr",
|
||||
"VMOVPDI2DIZrr(b?)(k?)(z?)",
|
||||
"VMOVPDI2DIrr",
|
||||
"VMOVPQIto64Zrr(b?)(k?)(z?)",
|
||||
"VMOVPQIto64rr",
|
||||
"VPMOVMSKBYrr",
|
||||
"VPMOVMSKBrr",
|
||||
"VTESTPDYrr",
|
||||
"VTESTPDrr",
|
||||
"VTESTPSYrr",
|
||||
|
@ -105,6 +105,11 @@ defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
|
||||
// These are often used on both floating point and integer vectors.
|
||||
defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
|
||||
|
||||
// MOVMSK operations.
|
||||
def WriteFMOVMSK : SchedWrite;
|
||||
def WriteVecMOVMSK : SchedWrite;
|
||||
def WriteMMXMOVMSK : SchedWrite;
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
|
||||
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
|
||||
|
@ -358,6 +358,14 @@ defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2,
|
||||
defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
|
||||
defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// MOVMSK Instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteFMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// AES Instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -771,13 +779,6 @@ def JWriteVMaskMovYSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> {
|
||||
}
|
||||
def : InstRW<[JWriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>;
|
||||
|
||||
def JWriteVMOVMSK: SchedWriteRes<[JFPU0, JFPA, JALU0]> {
|
||||
let Latency = 3;
|
||||
}
|
||||
def : InstRW<[JWriteVMOVMSK], (instrs MOVMSKPDrr, VMOVMSKPDrr, VMOVMSKPDYrr,
|
||||
MOVMSKPSrr, VMOVMSKPSrr, VMOVMSKPSYrr,
|
||||
PMOVMSKBrr, VPMOVMSKBrr, MMX_PMOVMSKBrr)>;
|
||||
|
||||
def JWriteVTESTY: SchedWriteRes<[JFPU01, JFPX, JFPA, JALU0]> {
|
||||
let Latency = 4;
|
||||
let ResourceCycles = [2, 2, 2, 1];
|
||||
|
@ -185,6 +185,11 @@ def : WriteRes<WritePCmpEStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let ResourceCycles = [21, 1];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVecMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
|
||||
|
||||
// AES Instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
|
||||
let Latency = 8;
|
||||
|
@ -216,6 +216,11 @@ defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
|
||||
// Vector Shift Operations
|
||||
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
|
||||
def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
|
||||
def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>;
|
||||
|
||||
// AES Instructions.
|
||||
defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>;
|
||||
defm : ZnWriteResFpuPair<WriteAESIMC, [ZnFPU01], 4>;
|
||||
@ -1004,14 +1009,12 @@ def : InstRW<[WriteMicrocoded],
|
||||
// m, v,v.
|
||||
def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
|
||||
|
||||
// PMOVMSKB.
|
||||
def ZnWritePMOVMSKB : SchedWriteRes<[ZnFPU2]> {
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
// PMOVMSKBY.
|
||||
def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> {
|
||||
let NumMicroOps = 2;
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : InstRW<[ZnWritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKBrr")>;
|
||||
def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>;
|
||||
|
||||
// PEXTR B/W/D/Q.
|
||||
@ -1150,11 +1153,6 @@ def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
|
||||
//=== Floating Point XMM and YMM Instructions ===//
|
||||
//-- Move instructions --//
|
||||
|
||||
// MOVMSKP S/D.
|
||||
// r32 <- x,y.
|
||||
def ZnWriteMOVMSKPr : SchedWriteRes<[ZnFPU2]> ;
|
||||
def : InstRW<[ZnWriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)(Y?)rr")>;
|
||||
|
||||
// VPERM2F128.
|
||||
def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rr")>;
|
||||
def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rm")>;
|
||||
|
@ -4096,7 +4096,7 @@ declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readn
|
||||
define i32 @test_pmovmskb(<32 x i8> %a0) {
|
||||
; GENERIC-LABEL: test_pmovmskb:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
|
||||
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
@ -4126,7 +4126,7 @@ define i32 @test_pmovmskb(<32 x i8> %a0) {
|
||||
;
|
||||
; ZNVER1-LABEL: test_pmovmskb:
|
||||
; ZNVER1: # %bb.0:
|
||||
; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
|
||||
; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:2.00]
|
||||
; ZNVER1-NEXT: vzeroupper # sched: [100:?]
|
||||
; ZNVER1-NEXT: retq # sched: [1:0.50]
|
||||
%1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
|
||||
|
@ -4068,7 +4068,7 @@ define i32 @test_pmovmskb(x86_mmx %a0) optsize {
|
||||
;
|
||||
; SLM-LABEL: test_pmovmskb:
|
||||
; SLM: # %bb.0:
|
||||
; SLM-NEXT: pmovmskb %mm0, %eax # sched: [1:0.50]
|
||||
; SLM-NEXT: pmovmskb %mm0, %eax # sched: [4:1.00]
|
||||
; SLM-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: test_pmovmskb:
|
||||
|
@ -3098,7 +3098,7 @@ define i32 @test_movmskps(<4 x float> %a0) {
|
||||
;
|
||||
; SLM-LABEL: test_movmskps:
|
||||
; SLM: # %bb.0:
|
||||
; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
|
||||
; SLM-NEXT: movmskps %xmm0, %eax # sched: [4:1.00]
|
||||
; SLM-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-SSE-LABEL: test_movmskps:
|
||||
|
@ -5107,7 +5107,7 @@ define i32 @test_movmskpd(<2 x double> %a0) {
|
||||
;
|
||||
; SLM-LABEL: test_movmskpd:
|
||||
; SLM: # %bb.0:
|
||||
; SLM-NEXT: movmskpd %xmm0, %eax # sched: [1:0.50]
|
||||
; SLM-NEXT: movmskpd %xmm0, %eax # sched: [4:1.00]
|
||||
; SLM-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-SSE-LABEL: test_movmskpd:
|
||||
@ -9684,7 +9684,7 @@ define i32 @test_pmovmskb(<16 x i8> %a0) {
|
||||
;
|
||||
; SLM-LABEL: test_pmovmskb:
|
||||
; SLM: # %bb.0:
|
||||
; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [1:0.50]
|
||||
; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [4:1.00]
|
||||
; SLM-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-SSE-LABEL: test_pmovmskb:
|
||||
|
Loading…
Reference in New Issue
Block a user