diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 59ca1ba7c9d..12df08c6c0c 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -265,7 +265,8 @@ std::string sys::getHostCPUName() { case 54: // 32 nm Atom Midview return "atom"; - case 55: // Intel Atom Silvermont processors + // Atom Silvermont codes from the Intel software optimization guide. + case 55: case 74: case 77: return "slm"; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a183d3a3d97..dc4a7eab080 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -166,10 +166,17 @@ include "X86Schedule.td" def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; +def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", + "Intel Silvermont processors">; class Proc Features> : ProcessorModel; +//class AtomProc Features> +// : ProcessorModel; +//class SLMProc Features> +// : ProcessorModel; + def : Proc<"generic", []>; def : Proc<"i386", []>; def : Proc<"i486", []>; @@ -209,15 +216,14 @@ def : ProcessorModel<"atom", AtomModel, FeatureLEAUsesAG, FeaturePadShortFunctions]>; -// Silvermont. -def : ProcessorModel<"slm", AtomModel, - [ProcIntelAtom, FeatureSSE42, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, - FeatureCallRegIndirect, - FeatureLEAUsesAG, - FeaturePadShortFunctions]>; - +// Atom Silvermont. +def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM, + FeatureSSE42, FeatureCMPXCHG16B, + FeatureMOVBE, FeaturePOPCNT, + FeaturePCLMUL, FeatureAES, + FeatureCallRegIndirect, + FeaturePRFCHW, + FeatureSlowBTMem]>; // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index ce91687524a..da2b9c10062 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -642,12 +642,12 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, (outs KRC:$dst), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))], - IIC_SSE_CMPP_RR>, EVEX_4V; + IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rm : AVX512BI, EVEX_4V; + IIC_SSE_ALU_F32P_RM>, EVEX_4V; } defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, @@ -677,19 +677,19 @@ multiclass avx512_icmp_cc opc, RegisterClass KRC, def rri : AVX512AIi8, EVEX_4V; + IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rmi : AVX512AIi8, EVEX_4V; + imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : AVX512AIi8, EVEX_4V; + asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rmi_alt : AVX512AIi8, EVEX_4V; + asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; } } @@ -732,10 +732,10 @@ multiclass avx512_cmp_packed; + asm_alt, [], IIC_SSE_ALU_F32P_RR, d>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_ALU_F32P_RM, d>; } } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 9ce02ba081c..04119753946 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -726,20 +726,25 @@ class BinOpRR_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR; + EFLAGS))], IIC_BIN_CARRY_NONMEM>; // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). -class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo> +class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, + mnemonic, "{$src2, $dst|$dst, $src2}", [], itin>, Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; } +// BinOpRR_RDD_Rev - Instructions like "adc reg, reg, reg" (reversed encoding). +class BinOpRR_RFF_Rev opcode, string mnemonic, X86TypeInfo typeinfo> + : BinOpRR_Rev; + // BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding). class BinOpRR_F_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : ITy opcode, string mnemonic, X86TypeInfo typeinfo> // BinOpRM - Instructions like "add reg, reg, [mem]". class BinOpRM opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, list pattern> + dag outlist, list pattern, + InstrItinClass itin = IIC_BIN_MEM> : ITy, + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". @@ -786,14 +792,15 @@ class BinOpRM_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRM; + EFLAGS))], IIC_BIN_CARRY_MEM>; // BinOpRI - Instructions like "add reg, reg, imm". class BinOpRI opcode, string mnemonic, X86TypeInfo typeinfo, - Format f, dag outlist, list pattern> + Format f, dag outlist, list pattern, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -824,14 +831,15 @@ class BinOpRI_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI; + EFLAGS))], IIC_BIN_CARRY_NONMEM>; // BinOpRI8 - Instructions like "add reg, reg, imm8". class BinOpRI8 opcode, string mnemonic, X86TypeInfo typeinfo, - Format f, dag outlist, list pattern> + Format f, dag outlist, list pattern, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -863,14 +871,14 @@ class BinOpRI8_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI8; + EFLAGS))], IIC_BIN_CARRY_NONMEM>; // BinOpMR - Instructions like "add [mem], reg". class BinOpMR opcode, string mnemonic, X86TypeInfo typeinfo, - list pattern> + list pattern, InstrItinClass itin = IIC_BIN_MEM> : ITy, + mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>, Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". @@ -886,7 +894,7 @@ class BinOpMR_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMR; + (implicit EFLAGS)], IIC_BIN_CARRY_MEM>; // BinOpMR_F - Instructions like "cmp [mem], reg". class BinOpMR_F opcode, string mnemonic, X86TypeInfo typeinfo, @@ -896,10 +904,11 @@ class BinOpMR_F opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpMI - Instructions like "add [mem], imm". class BinOpMI pattern, bits<8> opcode = 0x80> + Format f, list pattern, bits<8> opcode = 0x80, + InstrItinClass itin = IIC_BIN_MEM> : ITy, + mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>, Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -917,7 +926,7 @@ class BinOpMI_RMW_FF; + (implicit EFLAGS)], 0x80, IIC_BIN_CARRY_MEM>; // BinOpMI_F - Instructions like "cmp [mem], imm". class BinOpMI_F pattern> + Format f, list pattern, + InstrItinClass itin = IIC_BIN_MEM> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>, Sched<[WriteALULd, WriteRMW]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -951,7 +961,7 @@ class BinOpMI8_RMW_FF; + (implicit EFLAGS)], IIC_BIN_CARRY_MEM>; // BinOpMI8_F - Instructions like "cmp [mem], imm8". class BinOpMI8_F opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg, string operands> + Register areg, string operands, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, Sched<[WriteALU]> { + mnemonic, operands, [], itin>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg, EFLAGS]; @@ -976,7 +987,8 @@ class BinOpAI opcode, string mnemonic, X86TypeInfo typeinfo, // and use EFLAGS. class BinOpAI_FF opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands> - : BinOpAI { + : BinOpAI { let Uses = [areg, EFLAGS]; } @@ -1070,10 +1082,10 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def NAME#64rr : BinOpRR_RFF; } // isCommutable - def NAME#8rr_REV : BinOpRR_Rev; - def NAME#16rr_REV : BinOpRR_Rev; - def NAME#32rr_REV : BinOpRR_Rev; - def NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_RFF_Rev; + def NAME#16rr_REV : BinOpRR_RFF_Rev; + def NAME#32rr_REV : BinOpRR_RFF_Rev; + def NAME#64rr_REV : BinOpRR_RFF_Rev; def NAME#8rm : BinOpRM_RFF; def NAME#16rm : BinOpRM_RFF; diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 28954c65e89..4090550cfc0 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -14,26 +14,26 @@ let neverHasSideEffects = 1 in { let Defs = [AX], Uses = [AL] in def CBW : I<0x98, RawFrm, (outs), (ins), - "{cbtw|cbw}", []>, OpSize; // AX = signext(AL) + "{cbtw|cbw}", [], IIC_CBW>, OpSize; // AX = signext(AL) let Defs = [EAX], Uses = [AX] in def CWDE : I<0x98, RawFrm, (outs), (ins), - "{cwtl|cwde}", []>; // EAX = signext(AX) + "{cwtl|cwde}", [], IIC_CBW>; // EAX = signext(AX) let Defs = [AX,DX], Uses = [AX] in def CWD : I<0x99, RawFrm, (outs), (ins), - "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX) + "{cwtd|cwd}", [], IIC_CBW>, OpSize; // DX:AX = signext(AX) let Defs = [EAX,EDX], Uses = [EAX] in def CDQ : I<0x99, RawFrm, (outs), (ins), - "{cltd|cdq}", []>; // EDX:EAX = signext(EAX) + "{cltd|cdq}", [], IIC_CBW>; // EDX:EAX = signext(EAX) let Defs = [RAX], Uses = [EAX] in def CDQE : RI<0x98, RawFrm, (outs), (ins), - "{cltq|cdqe}", []>; // RAX = signext(EAX) + "{cltq|cdqe}", [], IIC_CBW>; // RAX = signext(EAX) let Defs = [RAX,RDX], Uses = [RAX] in def CQO : RI<0x99, RawFrm, (outs), (ins), - "{cqto|cqo}", []>; // RDX:RAX = signext(RAX) + "{cqto|cqo}", [], IIC_CBW>; // RDX:RAX = signext(RAX) } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index b50706c360f..fb07ed0818b 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -671,7 +671,7 @@ class AVX512FMA3 o, Format F, dag outs, dag ins, string asm, // AES8I // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I o, Format F, dag outs, dag ins, string asm, - listpattern, InstrItinClass itin = NoItinerary> + listpattern, InstrItinClass itin = IIC_AES> : I, T8, Requires<[HasAES]>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 961109fdf90..148ac6d32cc 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -977,53 +977,56 @@ let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))], - IIC_BSF>, TB, OpSize, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, TB, OpSize, Sched<[WriteShift]>; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))], - IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, OpSize, Sched<[WriteShiftLd]>; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB, + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], + IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))], - IIC_BSF>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))], - IIC_BSF>, TB, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))], - IIC_BSF>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>, + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], + IIC_BIT_SCAN_REG>, TB, OpSize, Sched<[WriteShift]>; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))], - IIC_BSR>, TB, + IIC_BIT_SCAN_MEM>, TB, OpSize, Sched<[WriteShiftLd]>; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB, + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], + IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))], - IIC_BSR>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB, + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))], - IIC_BSR>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; } // Defs = [EFLAGS] let SchedRW = [WriteMicrocoded] in { diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index cb129562aa9..8ab8e025bea 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -204,7 +204,7 @@ multiclass sse12_cvt_pint_3addr opc, RegisterClass SrcRC, //===----------------------------------------------------------------------===// def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", - [(int_x86_mmx_emms)]>; + [(int_x86_mmx_emms)], IIC_MMX_EMMS>; //===----------------------------------------------------------------------===// // MMX Scalar Instructions diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 83dd320fb30..16cad1ed30e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -151,6 +151,34 @@ def SSE_MOVU_ITINS : OpndItins< IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM >; +def SSE_DPPD_ITINS : OpndItins< + IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM +>; + +def SSE_DPPS_ITINS : OpndItins< + IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM +>; + +def DEFAULT_ITINS : OpndItins< + IIC_ALU_NONMEM, IIC_ALU_MEM +>; + +def SSE_EXTRACT_ITINS : OpndItins< + IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM +>; + +def SSE_INSERT_ITINS : OpndItins< + IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM +>; + +def SSE_MPSADBW_ITINS : OpndItins< + IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM +>; + +def SSE_PMULLD_ITINS : OpndItins< + IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM +>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// @@ -2307,7 +2335,7 @@ let Constraints = "$src1 = $dst" in { defm CMPSD : sse12_cmp_scalar, // same latency as 32 bit compare + SSE_ALU_F64S>, XD; } @@ -2342,7 +2370,7 @@ let Constraints = "$src1 = $dst" in { SSE_ALU_F32S>, XS; defm Int_CMPSD : sse12_cmp_scalar_int, // same latency as f32 + SSE_ALU_F64S>, XD; } @@ -2411,26 +2439,27 @@ let Defs = [EFLAGS] in { // sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed { + string asm_alt, Domain d, + OpndItins itins = SSE_ALU_F32P> { def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>, + itins.rr, d>, Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>, + itins.rm, d>, Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; + asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>, + asm_alt, [], itins.rm, d>, Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2455,11 +2484,11 @@ let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed, TB; + SSEPackedSingle, SSE_ALU_F32P>, TB; defm CMPPD : sse12_cmp_packed, TB, OpSize; + SSEPackedDouble, SSE_ALU_F64P>, TB, OpSize; } let Predicates = [HasAVX] in { @@ -3830,7 +3859,7 @@ defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>; + int_x86_avx2_psad_bw, SSE_PMADD, 1>; let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, @@ -3974,12 +4003,14 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "pslldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>; + (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))], + IIC_SSE_INTSHDQ_P_RI>; def PSRLDQri : PDIi8<0x73, MRM3r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "psrldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>; + (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))], + IIC_SSE_INTSHDQ_P_RI>; // PSRADQri doesn't exist in SSE[1-3]. } } // Constraints = "$src1 = $dst" @@ -4063,14 +4094,14 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, Sched<[WriteShuffleLd]>; } @@ -4081,14 +4112,14 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, Sched<[WriteShuffleLd]>; } @@ -4099,14 +4130,14 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, + (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, Sched<[WriteShuffleLd]>; } } @@ -5382,7 +5413,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; + [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5390,7 +5421,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; + [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5482,16 +5513,17 @@ def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, // SSE4.1 - Packed Move with Sign/Zero Extend //===----------------------------------------------------------------------===// -multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { +multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize; def rm : SS48I, - OpSize; + (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], + itins.rm>, OpSize; } multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr, @@ -5502,22 +5534,23 @@ multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr, def Yrm : SS48I, OpSize; + [(set VR256:$dst, (IntId (load addr:$src)))]>, + OpSize; } let Predicates = [HasAVX] in { -defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, - VEX; -defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, - VEX; -defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, - VEX; -defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, - VEX; -defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, - VEX; -defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, - VEX; +defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", + int_x86_sse41_pmovsxbw>, VEX; +defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", + int_x86_sse41_pmovsxwd>, VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", + int_x86_sse41_pmovsxdq>, VEX; +defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", + int_x86_sse41_pmovzxbw>, VEX; +defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", + int_x86_sse41_pmovzxwd>, VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", + int_x86_sse41_pmovzxdq>, VEX; } let Predicates = [HasAVX2] in { @@ -5535,12 +5568,12 @@ defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", int_x86_avx2_pmovzxdq>, VEX, VEX_L; } -defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; -defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; -defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; -defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; -defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; -defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; +defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw, SSE_INTALU_ITINS_P>; +defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd, SSE_INTALU_ITINS_P>; +defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq, SSE_INTALU_ITINS_P>; +defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw, SSE_INTALU_ITINS_P>; +defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd, SSE_INTALU_ITINS_P>; +defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq, SSE_INTALU_ITINS_P>; let Predicates = [HasAVX] in { // Common patterns involving scalar load. @@ -5655,15 +5688,17 @@ let Predicates = [UseSSE41] in { } -multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { +multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize; def rm : SS48I, + (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], + itins.rm>, OpSize; } @@ -5702,10 +5737,14 @@ defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", int_x86_avx2_pmovzxwq>, VEX, VEX_L; } -defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; -defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; -defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; -defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; +defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd, + SSE_INTALU_ITINS_P>; +defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq, + SSE_INTALU_ITINS_P>; +defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd, + SSE_INTALU_ITINS_P>; +defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq, + SSE_INTALU_ITINS_P>; let Predicates = [HasAVX] in { // Common patterns involving scalar load @@ -5733,7 +5772,8 @@ let Predicates = [UseSSE41] in { (PMOVZXWQrm addr:$src)>; } -multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { +multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; @@ -5772,8 +5812,10 @@ defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", int_x86_avx2_pmovzxbq>, VEX, VEX_L; } -defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; -defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; +defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq, + SSE_INTALU_ITINS_P>; +defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq, + SSE_INTALU_ITINS_P>; let Predicates = [HasAVX2] in { def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; @@ -6115,20 +6157,22 @@ defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory /// destination -multiclass SS41I_extractf32 opc, string OpcodeStr> { +multiclass SS41I_extractf32 opc, string OpcodeStr, + OpndItins itins = DEFAULT_ITINS> { def rr : SS4AIi8, + (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], + itins.rr>, OpSize; def mr : SS4AIi8, OpSize; + addr:$dst)], itins.rm>, OpSize; } let ExeDomain = SSEPackedSingle in { @@ -6139,7 +6183,7 @@ let ExeDomain = SSEPackedSingle in { "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX; } - defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; + defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; } // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -6239,7 +6283,8 @@ let Constraints = "$src1 = $dst" in // are optimized inserts that won't zero arbitrary elements in the destination // vector. The next one matches the intrinsic and could zero arbitrary elements // in the target vector. -multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { +multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { def rr : SS4AIi8 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, OpSize; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { [(set VR128:$dst, (X86insrtps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, OpSize; + imm:$src3))], itins.rm>, OpSize; } let ExeDomain = SSEPackedSingle in { let Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; + defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; } //===----------------------------------------------------------------------===// @@ -6283,7 +6328,8 @@ let ExeDomain = SSEPackedSingle in { (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>, + [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], + IIC_SSE_ROUNDPS_REG>, OpSize; // Vector intrinsic operation, mem @@ -6292,7 +6338,8 @@ let ExeDomain = SSEPackedSingle in { !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, + (V4F32Int (mem_frag32 addr:$src1),imm:$src2))], + IIC_SSE_ROUNDPS_MEM>, OpSize; } // ExeDomain = SSEPackedSingle @@ -6302,7 +6349,8 @@ let ExeDomain = SSEPackedDouble in { (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>, + [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], + IIC_SSE_ROUNDPS_REG>, OpSize; // Vector intrinsic operation, mem @@ -6311,7 +6359,8 @@ let ExeDomain = SSEPackedDouble in { !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, + (V2F64Int (mem_frag64 addr:$src1),imm:$src2))], + IIC_SSE_ROUNDPS_REG>, OpSize; } // ExeDomain = SSEPackedDouble } @@ -6593,30 +6642,33 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>, let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, + [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)], + IIC_SSE_POPCNT_RR>, OpSize, XS; def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctpop (loadi16 addr:$src))), - (implicit EFLAGS)]>, OpSize, XS; + (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, OpSize, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, + [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)], + IIC_SSE_POPCNT_RR>, XS; def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctpop (loadi32 addr:$src))), - (implicit EFLAGS)]>, XS; + (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS; def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, + [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)], + IIC_SSE_POPCNT_RR>, XS; def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctpop (loadi64 addr:$src))), - (implicit EFLAGS)]>, XS; + (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS; } @@ -6644,14 +6696,16 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator multiclass SS41I_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { + Intrinsic IntId128, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { let isCommutable = 1 in def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))], + itins.rr>, OpSize; def rm : SS48I opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))], + itins.rm>, OpSize; } /// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator @@ -6682,7 +6737,8 @@ multiclass SS41I_binop_rm_int_y opc, string OpcodeStr, /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { let isCommutable = 1 in def rr : SS48I; defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, - memopv2i64, i128mem>; - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, + 1, SSE_INTMUL_ITINS_P>; } let Predicates = [HasAVX] in { @@ -6785,15 +6842,16 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in { defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>; defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>; } /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate multiclass SS41I_binop_rmi_int opc, string OpcodeStr, Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { let isCommutable = 1 in def rri : SS4AIi8 opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, + [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, OpSize; def rmi : SS4AIi8 opc, string OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, (IntId RC:$src1, - (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, + (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, OpSize; } @@ -6862,21 +6920,27 @@ let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, - VR128, memopv4f32, f128mem>; + VR128, memopv4f32, f128mem, + 1, SSE_INTALU_ITINS_P>; let ExeDomain = SSEPackedDouble in defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, - VR128, memopv2f64, f128mem>; + VR128, memopv2f64, f128mem, + 1, SSE_INTALU_ITINS_P>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + 1, SSE_INTALU_ITINS_P>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + 1, SSE_INTMUL_ITINS_P>; } let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, - VR128, memopv4f32, f128mem>; + VR128, memopv4f32, f128mem, 1, + SSE_DPPS_ITINS>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, - VR128, memopv2f64, f128mem>; + VR128, memopv2f64, f128mem, 1, + SSE_DPPD_ITINS>; } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators @@ -6981,13 +7045,14 @@ let Predicates = [HasAVX2] in { /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int opc, string OpcodeStr, PatFrag mem_frag, - X86MemOperand x86memop, Intrinsic IntId> { + X86MemOperand x86memop, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr0 : SS48I, - OpSize; + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], + itins.rr>, OpSize; def rm0 : SS48I, OpSize; + (bitconvert (mem_frag addr:$src2)), XMM0))], + itins.rm>, OpSize; } } @@ -7262,61 +7328,66 @@ let Constraints = "$src1 = $dst" in { "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32_8 GR32:$src1, - (load addr:$src2)))]>; + (load addr:$src2)))], IIC_CRC32_MEM>; def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR8:$src2), "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>; + (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))], + IIC_CRC32_REG>; def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i16mem:$src2), "crc32{w}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32_16 GR32:$src1, - (load addr:$src2)))]>, + (load addr:$src2)))], IIC_CRC32_MEM>, OpSize; def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR16:$src2), "crc32{w}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>, + (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))], + IIC_CRC32_REG>, OpSize; def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "crc32{l}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32_32 GR32:$src1, - (load addr:$src2)))]>; + (load addr:$src2)))], IIC_CRC32_MEM>; def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "crc32{l}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>; + (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))], + IIC_CRC32_REG>; def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i8mem:$src2), "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, (int_x86_sse42_crc32_64_8 GR64:$src1, - (load addr:$src2)))]>, + (load addr:$src2)))], IIC_CRC32_MEM>, REX_W; def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR8:$src2), "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>, + (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))], + IIC_CRC32_REG>, REX_W; def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "crc32{q}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, (int_x86_sse42_crc32_64_64 GR64:$src1, - (load addr:$src2)))]>, + (load addr:$src2)))], IIC_CRC32_MEM>, REX_W; def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "crc32{q}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>, + (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))], + IIC_CRC32_REG>, REX_W; } @@ -7458,13 +7529,15 @@ def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>; + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], + IIC_SSE_PCLMULQDQ_RR>; def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, - (memopv2i64 addr:$src2), imm:$src3))]>; + (memopv2i64 addr:$src2), imm:$src3))], + IIC_SSE_PCLMULQDQ_RM>; } // Constraints = "$src1 = $dst" diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index ceb2e053b96..0556437b839 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -141,9 +141,12 @@ def IIC_IDIV64 : InstrItinClass; // neg/not/inc/dec def IIC_UNARY_REG : InstrItinClass; def IIC_UNARY_MEM : InstrItinClass; -// add/sub/and/or/xor/adc/sbc/cmp/test +// add/sub/and/or/xor/sbc/cmp/test def IIC_BIN_MEM : InstrItinClass; def IIC_BIN_NONMEM : InstrItinClass; +// adc/sbc +def IIC_BIN_CARRY_MEM : InstrItinClass; +def IIC_BIN_CARRY_NONMEM : InstrItinClass; // shift/rotate def IIC_SR : InstrItinClass; // shift double @@ -250,11 +253,11 @@ def IIC_SSE_INTSH_P_RR : InstrItinClass; def IIC_SSE_INTSH_P_RM : InstrItinClass; def IIC_SSE_INTSH_P_RI : InstrItinClass; -def IIC_SSE_CMPP_RR : InstrItinClass; -def IIC_SSE_CMPP_RM : InstrItinClass; +def IIC_SSE_INTSHDQ_P_RI : InstrItinClass; def IIC_SSE_SHUFP : InstrItinClass; -def IIC_SSE_PSHUF : InstrItinClass; +def IIC_SSE_PSHUF_RI : InstrItinClass; +def IIC_SSE_PSHUF_MI : InstrItinClass; def IIC_SSE_UNPCK : InstrItinClass; @@ -316,7 +319,8 @@ def IIC_SSE_PSIGN_RM : InstrItinClass; def IIC_SSE_PMADD : InstrItinClass; def IIC_SSE_PMULHRSW : InstrItinClass; -def IIC_SSE_PALIGNR : InstrItinClass; +def IIC_SSE_PALIGNRR : InstrItinClass; +def IIC_SSE_PALIGNRM : InstrItinClass; def IIC_SSE_MWAIT : InstrItinClass; def IIC_SSE_MONITOR : InstrItinClass; @@ -492,8 +496,8 @@ def IIC_PUSH_REG : InstrItinClass; def IIC_PUSH_F : InstrItinClass; def IIC_PUSH_A : InstrItinClass; def IIC_BSWAP : InstrItinClass; -def IIC_BSF : InstrItinClass; -def IIC_BSR : InstrItinClass; +def IIC_BIT_SCAN_MEM : InstrItinClass; +def IIC_BIT_SCAN_REG : InstrItinClass; def IIC_MOVS : InstrItinClass; def IIC_STOS : InstrItinClass; def IIC_SCAS : InstrItinClass; @@ -540,6 +544,33 @@ def IIC_BOUND : InstrItinClass; def IIC_ARPL_REG : InstrItinClass; def IIC_ARPL_MEM : InstrItinClass; def IIC_MOVBE : InstrItinClass; +def IIC_AES : InstrItinClass; +def IIC_BLEND_MEM : InstrItinClass; +def IIC_BLEND_NOMEM : InstrItinClass; +def IIC_CBW : InstrItinClass; +def IIC_CRC32_REG : InstrItinClass; +def IIC_CRC32_MEM : InstrItinClass; +def IIC_SSE_DPPD_RR : InstrItinClass; +def IIC_SSE_DPPD_RM : InstrItinClass; +def IIC_SSE_DPPS_RR : InstrItinClass; +def IIC_SSE_DPPS_RM : InstrItinClass; +def IIC_MMX_EMMS : InstrItinClass; +def IIC_SSE_EXTRACTPS_RR : InstrItinClass; +def IIC_SSE_EXTRACTPS_RM : InstrItinClass; +def IIC_SSE_INSERTPS_RR : InstrItinClass; +def IIC_SSE_INSERTPS_RM : InstrItinClass; +def IIC_SSE_MPSADBW_RR : InstrItinClass; +def IIC_SSE_MPSADBW_RM : InstrItinClass; +def IIC_SSE_PMULLD_RR : InstrItinClass; +def IIC_SSE_PMULLD_RM : InstrItinClass; +def IIC_SSE_ROUNDPS_REG : InstrItinClass; +def IIC_SSE_ROUNDPS_MEM : InstrItinClass; +def IIC_SSE_ROUNDPD_REG : InstrItinClass; +def IIC_SSE_ROUNDPD_MEM : InstrItinClass; +def IIC_SSE_POPCNT_RR : InstrItinClass; +def IIC_SSE_POPCNT_RM : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RR : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RM : InstrItinClass; def IIC_NOP : InstrItinClass; @@ -561,7 +592,7 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // -// The GenericModel contains no instruciton itineraries. +// The GenericModel contains no instruction itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MicroOpBufferSize = 32; @@ -572,3 +603,4 @@ def GenericModel : SchedMachineModel { include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" +include "X86ScheduleSLM.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 14a1471d027..ba72f29910f 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines the itinerary class data for the Intel Atom (Bonnell) -// processors. +// This file defines the itinerary class data for the Intel Atom +// in order (Saltwell-32nm/Bonnell-45nm) processors. // //===----------------------------------------------------------------------===// @@ -79,9 +79,12 @@ def AtomItineraries : ProcessorItineraries< // neg/not/inc/dec InstrItinData] >, InstrItinData] >, - // add/sub/and/or/xor/adc/sbc/cmp/test + // add/sub/and/or/xor/cmp/test InstrItinData] >, InstrItinData] >, + // adc/sbc + InstrItinData] >, + InstrItinData] >, // shift/rotate InstrItinData] >, // shift double @@ -203,11 +206,11 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, - InstrItinData] >, - InstrItinData] >, + InstrItinData] >, InstrItinData] >, - InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, @@ -278,7 +281,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, - InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, @@ -470,8 +474,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, - InstrItinData] >, - InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, InstrItinData] >, @@ -518,6 +522,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] > ]>; diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td new file mode 100644 index 00000000000..6c2a3040240 --- /dev/null +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -0,0 +1,668 @@ +//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Intel Atom +// (Silvermont) processor. +// +//===----------------------------------------------------------------------===// + +def IEC_RSV0 : FuncUnit; +def IEC_RSV1 : FuncUnit; +def FPC_RSV0 : FuncUnit; +def FPC_RSV1 : FuncUnit; +def MEC_RSV : FuncUnit; + + + + + + + + + + + + + + +def SLMItineraries : ProcessorItineraries< + [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ], + [], [ + // [InstrStage] + // [InstrStage, InstrStage] + // [InstrStage] + // [InstrStage,InstrStage] + // + // Default is 1 cycle, IEC_RSV0 or IEC_RSV1 + //InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // mul + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + // imul by al, ax, eax, rax + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + // imul reg by reg|mem + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + // imul reg = reg/mem * imm + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + // idiv - min latency + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // div - min latency + InstrItinData] >, + InstrItinData, + InstrStage<25, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // neg/not/inc/dec + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // add/sub/and/or/xor/adc/sbc/cmp/test + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // adc/sbb + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + // shift/rotate + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // shift double + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + // cmov + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + // set + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + // jcc + InstrItinData] >, + // jcxz/jecxz/jrcxz + InstrItinData] >, + // jmp rel + InstrItinData] >, + // jmp indirect + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // jmp far + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + // loop/loope/loopne + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // call - all but reg/imm + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + //ret + InstrItinData] >, + InstrItinData] >, + //sign extension movs + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + //zero extension movs + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + + // SSE binary operations + // arithmetic fp scalar + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + + // arithmetic fp parallel + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<27, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<27, [MEC_RSV]>] >, + + // bitwise parallel + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + // arithmetic int parallel + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + + // multiply int parallel + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + + // shift parallel + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<26, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<26, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >, + InstrItinData] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + + // conversions + // to/from PD ... + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + // to/from PS except to/from PD and PS2PI + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + + // MMX MOVs + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // other MMX + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // conversions + // from/to PD + InstrItinData] >, + InstrItinData] >, + // from/to PI + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + // System instructions + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + // worst case for mov REG_CRx + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // LAR + InstrItinData] >, + InstrItinData] >, + // LSL + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // push control register, segment registers + InstrItinData] >, + InstrItinData] >, + // pop control register, segment registers + InstrItinData] >, + InstrItinData] >, + // VERR, VERW + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // WRMSR, RDMSR + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // SMSW, LMSW + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<10, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData, + InstrStage<10, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<12, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<15, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<11, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<10, [MEC_RSV]>] >, + + InstrItinData] > + ]>; + +// Silvermont machine model. +def SLMModel : SchedMachineModel { + let IssueWidth = 2; // Allows 2 instructions per scheduling group. + let MinLatency = 1; // InstrStage cycles overrides MinLatency. + // OperandCycles may be used for expected latency. + let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. + let HighLatency = 30;// Expected, may be overriden by OperandCycles. + + let Itineraries = SLMItineraries; +} diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 851ab63ee62..78c9a1a6a34 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -281,7 +281,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureFastUAMem); } - // Set processor type. Currently only Atom is detected. + // Set processor type. Currently only Atom or Silvermont (SLM) is detected. if (Family == 6 && (Model == 28 || Model == 38 || Model == 39 || Model == 53 || Model == 54)) { @@ -290,6 +290,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { UseLeaForSP = true; ToggleFeature(X86::FeatureLeaForSP); } + else if (Family == 6 && + (Model == 55 || Model == 74 || Model == 77)) { + X86ProcFamily = IntelSLM; + } unsigned MaxExtLevel; X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -451,7 +455,7 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // new MCSchedModel is used. InitMCProcessorInfo(CPUName, FS); - if (X86ProcFamily == IntelAtom) + if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM) PostRAScheduler = true; InstrItins = getInstrItineraryForCPU(CPUName); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 28aae20c25d..90378fcc171 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -50,7 +50,7 @@ protected: }; enum X86ProcFamilyEnum { - Others, IntelAtom + Others, IntelAtom, IntelSLM }; /// X86ProcFamily - X86 processor family: Intel Atom, and others