mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Instruction selection priority fixes to remove the XMM/XMMInt/orAVX predicates. Another commit will remove orAVX functions from X86SubTarget.
llvm-svn: 147841
This commit is contained in:
parent
7855c5d08f
commit
c9756440ea
@ -437,33 +437,26 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
|
||||
}
|
||||
|
||||
// FISTTP requires SSE3 even though it's a FPStack op.
|
||||
let Predicates = [HasSSE3] in {
|
||||
def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
|
||||
def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
|
||||
def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
|
||||
def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
|
||||
def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
|
||||
def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
|
||||
def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
|
||||
def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
|
||||
def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
|
||||
} // Predicates = [HasSSE3]
|
||||
|
||||
let mayStore = 1 in {
|
||||
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
|
||||
|
@ -436,7 +436,7 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
// SS42FI - SSE 4.2 instructions with T8XD prefix.
|
||||
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42orAVX]>;
|
||||
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
|
||||
|
||||
// SS42AI = SSE 4.2 instructions with TA prefix
|
||||
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
@ -569,11 +569,6 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
|
||||
// MMXID - MMX instructions with XD prefix.
|
||||
// MMXIS - MMX instructions with XS prefix.
|
||||
// MMXPI - SSE 1 & 2 packed instructions for MMX with no AVX equivalents
|
||||
// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. No AVX equiv.
|
||||
// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. No AVX equiv.
|
||||
// MMXSS38I - SSSE3 instructions with T8 prefix for MMX registers. No AVX equiv.
|
||||
// MMXSS3AI - SSSE3 instructions with TA prefix for MMX registers. No AVX equiv.
|
||||
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
|
||||
@ -595,21 +590,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
|
||||
|
||||
class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
Domain d>
|
||||
: I<o, F, outs, ins, asm, pattern, d> {
|
||||
let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasXMMInt], [HasXMM]);
|
||||
}
|
||||
class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasXMMInt]>;
|
||||
class MMXSSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasXMMInt]>;
|
||||
class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3orAVX]>;
|
||||
class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3orAVX]>;
|
||||
|
@ -470,14 +470,8 @@ def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
|
||||
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
|
||||
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
|
||||
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
|
||||
|
||||
def HasAVX : Predicate<"Subtarget->hasAVX()">;
|
||||
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
|
||||
def HasXMM : Predicate<"Subtarget->hasXMM()">;
|
||||
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
|
||||
def HasSSE3orAVX : Predicate<"Subtarget->hasSSE3orAVX()">;
|
||||
def HasSSSE3orAVX : Predicate<"Subtarget->hasSSSE3orAVX()">;
|
||||
def HasSSE42orAVX : Predicate<"Subtarget->hasSSE42orAVX()">;
|
||||
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
@ -492,8 +486,8 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
|
||||
def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
|
||||
def HasBMI : Predicate<"Subtarget->hasBMI()">;
|
||||
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
|
||||
def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
|
||||
def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
|
||||
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
|
||||
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
|
||||
def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
|
||||
AssemblerPredicate<"!Mode64Bit">;
|
||||
|
@ -60,14 +60,14 @@ let Constraints = "$src1 = $dst" in {
|
||||
/// Unary MMX instructions requiring SSSE3.
|
||||
multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId64> {
|
||||
def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR64:$dst, (IntId64 VR64:$src))]>;
|
||||
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR64:$dst, (IntId64 VR64:$src))]>;
|
||||
|
||||
def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR64:$dst,
|
||||
(IntId64 (bitconvert (memopmmx addr:$src))))]>;
|
||||
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR64:$dst,
|
||||
(IntId64 (bitconvert (memopmmx addr:$src))))]>;
|
||||
}
|
||||
|
||||
/// Binary MMX instructions requiring SSSE3.
|
||||
@ -75,11 +75,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in {
|
||||
multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId64> {
|
||||
let isCommutable = 0 in
|
||||
def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
|
||||
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
|
||||
def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
|
||||
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i64mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst,
|
||||
@ -90,11 +90,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
|
||||
|
||||
/// PALIGN MMX instructions (require SSSE3).
|
||||
multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
|
||||
def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
|
||||
def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
|
||||
def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
|
||||
def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
@ -104,18 +104,18 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
|
||||
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
|
||||
string asm, Domain d> {
|
||||
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
|
||||
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
|
||||
[(set DstRC:$dst, (Int SrcRC:$src))], d>;
|
||||
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
|
||||
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
|
||||
[(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
|
||||
}
|
||||
|
||||
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
|
||||
PatFrag ld_frag, string asm, Domain d> {
|
||||
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
|
||||
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
|
||||
asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
|
||||
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||
(ins DstRC:$src1, x86memop:$src2), asm,
|
||||
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
|
||||
}
|
||||
@ -175,24 +175,24 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(store (x86mmx VR64:$src), addr:$dst)]>;
|
||||
|
||||
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
|
||||
def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
|
||||
[(set VR64:$dst,
|
||||
(x86mmx (bitconvert
|
||||
(i64 (vector_extract (v2i64 VR128:$src),
|
||||
(iPTR 0))))))]>;
|
||||
|
||||
def MMX_MOVQ2DQrr : MMXSSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
|
||||
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (scalar_to_vector
|
||||
(i64 (bitconvert (x86mmx VR64:$src))))))]>;
|
||||
|
||||
let neverHasSideEffects = 1 in
|
||||
def MMX_MOVQ2FR64rr: MMXSSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
|
||||
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
|
||||
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>;
|
||||
|
||||
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
|
||||
def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>;
|
||||
|
||||
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
|
||||
|
@ -171,7 +171,7 @@ def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
|
||||
|
||||
// Bitcasts between 128-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion
|
||||
let Predicates = [HasXMMInt] in {
|
||||
let Predicates = [HasSSE2] in {
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
@ -244,9 +244,9 @@ let Predicates = [HasAVX] in {
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isPseudo = 1 in {
|
||||
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
|
||||
[(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>;
|
||||
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
|
||||
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
|
||||
[(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>;
|
||||
[(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1407,9 +1407,11 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
|
||||
multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
X86MemOperand x86memop, string asm> {
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>;
|
||||
let mayLoad = 1 in
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>;
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
@ -1423,12 +1425,14 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
|
||||
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
X86MemOperand x86memop, string asm> {
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
|
||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
|
||||
let mayLoad = 1 in
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||
(ins DstRC:$src1, x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
|
||||
@ -1459,7 +1463,7 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
|
||||
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
|
||||
VEX_4V, VEX_W, VEX_LIG;
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [HasAVX], AddedComplexity = 1 in {
|
||||
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
|
||||
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
|
||||
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
|
||||
@ -1623,17 +1627,6 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
|
||||
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE1] in {
|
||||
def : Pat<(int_x86_sse_cvtss2si VR128:$src),
|
||||
(CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
|
||||
def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
|
||||
(CVTSS2SIrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
|
||||
(CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
|
||||
def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
|
||||
(CVTSS2SI64rm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(int_x86_sse_cvtss2si VR128:$src),
|
||||
(VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
|
||||
@ -1645,6 +1638,17 @@ let Predicates = [HasAVX] in {
|
||||
(VCVTSS2SI64rm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE1] in {
|
||||
def : Pat<(int_x86_sse_cvtss2si VR128:$src),
|
||||
(CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
|
||||
def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
|
||||
(CVTSS2SIrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
|
||||
(CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
|
||||
def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
|
||||
(CVTSS2SI64rm addr:$src)>;
|
||||
}
|
||||
|
||||
/// SSE 2 Only
|
||||
|
||||
// Convert scalar double to scalar single
|
||||
@ -1844,6 +1848,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
|
||||
// Convert with truncation packed single/double fp to doubleword
|
||||
// SSE2 packed instructions with XS prefix
|
||||
let neverHasSideEffects = 1 in {
|
||||
def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
let mayLoad = 1 in
|
||||
@ -1854,14 +1859,7 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
let mayLoad = 1 in
|
||||
def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq VR128:$src))]>;
|
||||
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
|
||||
} // neverHasSideEffects = 1
|
||||
|
||||
def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"vcvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
@ -1874,12 +1872,14 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
(memop addr:$src)))]>,
|
||||
XS, VEX, Requires<[HasAVX]>;
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
|
||||
(Int_CVTDQ2PSrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
|
||||
(CVTTPS2DQrr VR128:$src)>;
|
||||
}
|
||||
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq VR128:$src))]>;
|
||||
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
|
||||
@ -1892,6 +1892,13 @@ let Predicates = [HasAVX] in {
|
||||
(VCVTTPS2DQYrr VR256:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
|
||||
(Int_CVTDQ2PSrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
|
||||
(CVTTPS2DQrr VR128:$src)>;
|
||||
}
|
||||
|
||||
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttpd2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
@ -3145,6 +3152,7 @@ let Predicates = [HasAVX] in {
|
||||
sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
|
||||
}
|
||||
|
||||
let AddedComplexity = 1 in {
|
||||
def : Pat<(f32 (fsqrt FR32:$src)),
|
||||
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
|
||||
def : Pat<(f32 (fsqrt (load addr:$src))),
|
||||
@ -3167,8 +3175,9 @@ def : Pat<(f32 (X86frcp FR32:$src)),
|
||||
def : Pat<(f32 (X86frcp (load addr:$src))),
|
||||
(VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[HasAVX, OptForSize]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [HasAVX], AddedComplexity = 1 in {
|
||||
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
|
||||
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
|
||||
(VSQRTSSr (f32 (IMPLICIT_DEF)),
|
||||
@ -3292,11 +3301,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
|
||||
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||
"movnti{l}\t{$src, $dst|$dst, $src}",
|
||||
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
|
||||
TB, Requires<[HasXMMInt]>;
|
||||
TB, Requires<[HasSSE2]>;
|
||||
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||
"movnti{q}\t{$src, $dst|$dst, $src}",
|
||||
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
|
||||
TB, Requires<[HasXMMInt]>;
|
||||
TB, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3304,7 +3313,7 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Prefetch intrinsic.
|
||||
let Predicates = [HasXMM] in {
|
||||
let Predicates = [HasSSE1] in {
|
||||
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
|
||||
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
|
||||
def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src),
|
||||
@ -3318,7 +3327,7 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
|
||||
// Flush cache
|
||||
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
|
||||
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
|
||||
TB, Requires<[HasXMMInt]>;
|
||||
TB, Requires<[HasSSE2]>;
|
||||
|
||||
// Pause. This "instruction" is encoded as "rep; nop", so even though it
|
||||
// was introduced with SSE2, it's backward compatible.
|
||||
@ -3326,11 +3335,11 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
|
||||
|
||||
// Load, store, and memory fence
|
||||
def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
|
||||
"sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasXMM]>;
|
||||
"sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
|
||||
def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
|
||||
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasXMMInt]>;
|
||||
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
|
||||
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
|
||||
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasXMMInt]>;
|
||||
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(X86SFence), (SFENCE)>;
|
||||
def : Pat<(X86LFence), (LFENCE)>;
|
||||
@ -5475,18 +5484,18 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
let usesCustomInserter = 1 in {
|
||||
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
|
||||
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
Requires<[HasSSE3]>;
|
||||
def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
|
||||
[(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
Requires<[HasSSE3]>;
|
||||
}
|
||||
|
||||
let Uses = [EAX, ECX, EDX] in
|
||||
def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
Requires<[HasSSE3]>;
|
||||
let Uses = [ECX, EAX] in
|
||||
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
|
||||
Requires<[HasSSE3orAVX]>;
|
||||
Requires<[HasSSE3]>;
|
||||
|
||||
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
|
||||
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
|
||||
|
@ -173,12 +173,12 @@ public:
|
||||
|
||||
bool hasCMov() const { return HasCMov; }
|
||||
bool hasMMX() const { return X86SSELevel >= MMX; }
|
||||
bool hasSSE1() const { return X86SSELevel >= SSE1 && !hasAVX(); }
|
||||
bool hasSSE2() const { return X86SSELevel >= SSE2 && !hasAVX(); }
|
||||
bool hasSSE3() const { return X86SSELevel >= SSE3 && !hasAVX(); }
|
||||
bool hasSSSE3() const { return X86SSELevel >= SSSE3 && !hasAVX(); }
|
||||
bool hasSSE41() const { return X86SSELevel >= SSE41 && !hasAVX(); }
|
||||
bool hasSSE42() const { return X86SSELevel >= SSE42 && !hasAVX(); }
|
||||
bool hasSSE1() const { return X86SSELevel >= SSE1; }
|
||||
bool hasSSE2() const { return X86SSELevel >= SSE2; }
|
||||
bool hasSSE3() const { return X86SSELevel >= SSE3; }
|
||||
bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
|
||||
bool hasSSE41() const { return X86SSELevel >= SSE41; }
|
||||
bool hasSSE42() const { return X86SSELevel >= SSE42; }
|
||||
bool hasSSE4A() const { return HasSSE4A; }
|
||||
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||
|
Loading…
Reference in New Issue
Block a user