1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[X86][AVX512] extend support in Scalar conversion

add scalar FP to Int conversion with truncation intrinsics
add scalar conversion FP32 from/to FP64 intrinsics
add rounding mode and SAE mode encoding for these intrinsics

Differential Revision: http://reviews.llvm.org/D12665

llvm-svn: 248117
This commit is contained in:
Asaf Badouh 2015-09-20 14:31:19 +00:00
parent 6c78cd17ac
commit 4ce11a0a36
9 changed files with 2433 additions and 139 deletions

View File

@ -3855,10 +3855,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">, def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">, def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">, def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">, def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
@ -3870,10 +3874,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">, def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">, def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">, def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">, def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>; llvm_i32_ty], [IntrNoMem]>;
@ -4222,6 +4230,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], [llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_avx512_mask_cvtsd2ss_round :
GCCBuiltin<"__builtin_ia32_cvtsd2ss_round">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtss2sd_round :
GCCBuiltin<"__builtin_ia32_cvtss2sd_round">,
Intrinsic<[llvm_v2f64_ty],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2ps : def int_x86_avx512_mask_cvtpd2ps :
GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">, GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
Intrinsic<[llvm_v4f32_ty], Intrinsic<[llvm_v4f32_ty],

View File

@ -145,6 +145,8 @@ def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
// We map scalar types to the smallest (128-bit) vector type // We map scalar types to the smallest (128-bit) vector type
// with the appropriate element type. This allows to use the same masking logic. // with the appropriate element type. This allows to use the same masking logic.
def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
@ -4598,50 +4600,55 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from float/double to integer // AVX-512 Scalar convert from float/double to integer
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC,
Intrinsic Int, Operand memop, ComplexPattern mem_cpat, RegisterClass DstRC, Intrinsic Int,
string asm> { Operand memop, ComplexPattern mem_cpat, string asm> {
let hasSideEffects = 0 in { let hasSideEffects = 0, Predicates = [HasAVX512] in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"), !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG, [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
Requires<[HasAVX512]>; def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
let mayLoad = 1 in !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>,
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), EVEX, VEX_LIG, EVEX_B, EVEX_RC;
!strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG, let mayLoad = 1 in
Requires<[HasAVX512]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
} // hasSideEffects = 0 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
} // hasSideEffects = 0, Predicates = [HasAVX512]
} }
let Predicates = [HasAVX512] in {
// Convert float/double to signed/unsigned int 32/64 // Convert float/double to signed/unsigned int 32/64
defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si">, ssmem, sse_load_f32, "cvtss2si">,
XS, EVEX_CD8<32, CD8VT1>; XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64, defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
int_x86_sse_cvtss2si64,
ssmem, sse_load_f32, "cvtss2si">, ssmem, sse_load_f32, "cvtss2si">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>; XS, VEX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi, defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
int_x86_avx512_cvtss2usi,
ssmem, sse_load_f32, "cvtss2usi">, ssmem, sse_load_f32, "cvtss2usi">,
XS, EVEX_CD8<32, CD8VT1>; XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
int_x86_avx512_cvtss2usi64, ssmem, int_x86_avx512_cvtss2usi64, ssmem,
sse_load_f32, "cvtss2usi">, XS, VEX_W, sse_load_f32, "cvtss2usi">, XS, VEX_W,
EVEX_CD8<32, CD8VT1>; EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
sdmem, sse_load_f64, "cvtsd2si">, sdmem, sse_load_f64, "cvtsd2si">,
XD, EVEX_CD8<64, CD8VT1>; XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64, defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
int_x86_sse2_cvtsd2si64,
sdmem, sse_load_f64, "cvtsd2si">, sdmem, sse_load_f64, "cvtsd2si">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>; XD, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi, defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
int_x86_avx512_cvtsd2usi,
sdmem, sse_load_f64, "cvtsd2usi">, sdmem, sse_load_f64, "cvtsd2usi">,
XD, EVEX_CD8<64, CD8VT1>; XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
int_x86_avx512_cvtsd2usi64, sdmem, int_x86_avx512_cvtsd2usi64, sdmem,
sse_load_f64, "cvtsd2usi">, XD, VEX_W, sse_load_f64, "cvtsd2usi">, XD, VEX_W,
EVEX_CD8<64, CD8VT1>; EVEX_CD8<64, CD8VT1>;
let isCodeGenOnly = 1 in { let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, EVEX_4V; SSE_CVT_Scalar, 0>, XS, EVEX_4V;
@ -4658,121 +4665,170 @@ let isCodeGenOnly = 1 in {
defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
SSE_CVT_Scalar, 0>, XD, EVEX_4V; SSE_CVT_Scalar, 0>, XD, EVEX_4V;
} // isCodeGenOnly = 1 } // isCodeGenOnly = 1, Predicates = [HasAVX512]
// Convert float/double to signed/unsigned int 32/64 with truncation // Convert float/double to signed/unsigned int 32/64 with truncation
let isCodeGenOnly = 1 in { multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si, X86VectorVTInfo _DstRC, SDNode OpNode,
ssmem, sse_load_f32, "cvttss2si">, SDNode OpNodeRnd>{
XS, EVEX_CD8<32, CD8VT1>; let Predicates = [HasAVX512] in {
defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
int_x86_sse_cvttss2si64, ssmem, sse_load_f32, !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
"cvttss2si">, XS, VEX_W, [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
EVEX_CD8<32, CD8VT1>; def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si, !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
sdmem, sse_load_f64, "cvttsd2si">, XD, []>, EVEX, EVEX_B;
EVEX_CD8<64, CD8VT1>; def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.MemOp:$src),
defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
"cvttsd2si">, XD, VEX_W, EVEX;
EVEX_CD8<64, CD8VT1>;
defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
"cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
int_x86_avx512_cvttss2usi64, ssmem,
sse_load_f32, "cvttss2usi">, XS, VEX_W,
EVEX_CD8<32, CD8VT1>;
defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
int_x86_avx512_cvttsd2usi,
sdmem, sse_load_f64, "cvttsd2usi">, XD,
EVEX_CD8<64, CD8VT1>;
defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
int_x86_avx512_cvttsd2usi64, sdmem,
sse_load_f64, "cvttsd2usi">, XD, VEX_W,
EVEX_CD8<64, CD8VT1>;
} // isCodeGenOnly = 1
multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, let isCodeGenOnly = 1,hasSideEffects = 0 in {
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
string asm> { !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
!strconcat(asm,"\t{$src, $dst|$dst, $src}"), (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
[(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX; def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX; (i32 FROUND_NO_EXC)))]>,
EVEX,VEX_LIG , EVEX_B;
let mayLoad = 1 in
def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
(ins _SrcRC.MemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[]>, EVEX, VEX_LIG;
} // isCodeGenOnly = 1, hasSideEffects = 0
} //HasAVX512
} }
defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
loadf32, "cvttss2si">, XS, defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info,
EVEX_CD8<32, CD8VT1>; fp_to_sint,X86cvttss2IntRnd>,
defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem, XS, EVEX_CD8<32, CD8VT1>;
loadf32, "cvttss2usi">, XS, defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info,
EVEX_CD8<32, CD8VT1>; fp_to_sint,X86cvttss2IntRnd>,
defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
loadf32, "cvttss2si">, XS, VEX_W, defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info,
EVEX_CD8<32, CD8VT1>; fp_to_sint,X86cvttsd2IntRnd>,
defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem, XD, EVEX_CD8<64, CD8VT1>;
loadf32, "cvttss2usi">, XS, VEX_W, defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info,
EVEX_CD8<32, CD8VT1>; fp_to_sint,X86cvttsd2IntRnd>,
defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
loadf64, "cvttsd2si">, XD,
EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info,
defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem, fp_to_uint,X86cvttss2UIntRnd>,
loadf64, "cvttsd2usi">, XD, XS, EVEX_CD8<32, CD8VT1>;
EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info,
defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem, fp_to_uint,X86cvttss2UIntRnd>,
loadf64, "cvttsd2si">, XD, VEX_W, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info,
defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem, fp_to_uint,X86cvttsd2UIntRnd>,
loadf64, "cvttsd2usi">, XD, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info,
fp_to_uint,X86cvttsd2UIntRnd>,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
(VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
(VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
(VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
(VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
} // HasAVX512 } // HasAVX512
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back // AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
let hasSideEffects = 0 in { multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), X86VectorVTInfo _Src, SDNode OpNode> {
(ins FR32X:$src1, FR32X:$src2), defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
[]>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; "$src2, $src1", "$src1, $src2",
let mayLoad = 1 in (_.VT (OpNode (_Src.VT _Src.RC:$src1),
def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), (_Src.VT _Src.RC:$src2)))>,
(ins FR32X:$src1, f32mem:$src2), EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
[]>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
EVEX_CD8<32, CD8VT1>; "$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_Src.VT _Src.RC:$src1),
// Convert scalar double to scalar single (_Src.VT (scalar_to_vector
def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), (_Src.ScalarLdFrag addr:$src2)))))>,
(ins FR64X:$src1, FR64X:$src2), EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
(ins FR64X:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, EVEX_4V, VEX_LIG, VEX_W,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
} }
def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, // Scalar Coversion with SAE - suppress all exceptions
Requires<[HasAVX512]>; multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
def : Pat<(fextend (loadf32 addr:$src)), X86VectorVTInfo _Src, SDNode OpNodeRnd> {
(VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2),
(i32 FROUND_NO_EXC)))>,
EVEX_4V, VEX_LIG, EVEX_B;
}
def : Pat<(extloadf32 addr:$src), // Scalar Conversion with rounding control (RC)
(VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
EVEX_B, EVEX_RC;
}
multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd, X86VectorVTInfo _src,
X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
EVEX_V512, XD;
}
}
multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeRnd, X86VectorVTInfo _src,
X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
}
}
defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
X86froundRnd, f64x_info, f32x_info>;
defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext,
X86fpextRnd,f32x_info, f64x_info >;
def : Pat<(f64 (fextend FR32X:$src)),
(COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
(COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (fextend (loadf32 addr:$src))),
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (extloadf32 addr:$src)),
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[HasAVX512, OptForSize]>; Requires<[HasAVX512, OptForSize]>;
def : Pat<(extloadf32 addr:$src), def : Pat<(f64 (extloadf32 addr:$src)),
(VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
Requires<[HasAVX512, OptForSpeed]>; (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, def : Pat<(f32 (fround FR64X:$src)),
(COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
(COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
Requires<[HasAVX512]>; Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double // AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer // and from float/double to signed/unsigned integer

View File

@ -137,6 +137,35 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<0>, SDTCisFP<1>,
SDTCisOpSmallerThanOp<0, 1>]>>; SDTCisOpSmallerThanOp<0, 1>]>>;
def X86fround: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
SDTCVecEltisVT<2, f64>,
SDTCisOpSmallerThanOp<0, 1>]>>;
def X86froundRnd: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
SDTCVecEltisVT<2, f64>,
SDTCisOpSmallerThanOp<0, 1>,
SDTCisInt<3>]>>;
def X86fpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
SDTCVecEltisVT<2, f32>,
SDTCisOpSmallerThanOp<1, 0>]>>;
def X86fpextRnd : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
SDTCVecEltisVT<2, f32>,
SDTCisOpSmallerThanOp<1, 0>,
SDTCisInt<3>]>>;
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
@ -390,9 +419,12 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>; SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
def SDTSDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisFP<1>,
SDTCVecEltisVT<1, f64>, SDTCisInt<2>]>;
def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>; SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
SDTCVecEltisVT<1, f32>, SDTCisInt<2>]>;
def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCVecEltisVT<1, i32>, SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
SDTCisInt<2>]>; SDTCisInt<2>]>;
@ -411,6 +443,10 @@ def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>; def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>; def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
def X86cvttss2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSFloatToIntRnd>;
def X86cvttss2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSFloatToIntRnd>;
def X86cvttsd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSDoubleToIntRnd>;
def X86cvttsd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSDoubleToIntRnd>;
// Vector with rounding mode // Vector with rounding mode
// cvtt fp-to-int staff // cvtt fp-to-int staff

View File

@ -318,6 +318,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
@ -551,6 +559,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
ISD::SINT_TO_FP, 0), ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, ISD::SINT_TO_FP), ISD::SINT_TO_FP, ISD::SINT_TO_FP),
X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPROUND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPEXT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, 0), ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK, X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,

View File

@ -216,12 +216,47 @@ define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
} }
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
; CHECK: vcvttsd2si {{.*}}encoding: [0x62 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
%res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
ret i64 %res %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
} }
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
%res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
%res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
; CHECK: vcvttsd2si {{.*}}encoding: [0x62
; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
%res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
%res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
%res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
; CHECK: vcvtss2si {{.*}}encoding: [0x62 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
@ -239,12 +274,45 @@ define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
; CHECK: vcvttss2si {{.*}}encoding: [0x62 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
%res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1] %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
ret i64 %res %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
} }
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
; CHECK: vcvttss2si {{.*}}encoding: [0x62
; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
%res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
; CHECK: vcvttss2usi {{.*}}encoding: [0x62
%res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
%res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
; CHECK: vcvttss2usi {{.*}}encoding: [0x62
; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
%res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
%res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
; CHECK: vcvtsd2usi {{.*}}encoding: [0x62 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
@ -4272,3 +4340,32 @@ define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i6
ret <8 x i64> %res4 ret <8 x i64> %res4
} }
declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
; CHECK: kmovw %edi, %k1
; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
; CHECK: kmovw %edi, %k1
; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}

File diff suppressed because it is too large Load Diff

View File

@ -2287,6 +2287,78 @@
// CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff] // CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
vcvtqq2ps -1032(%rdx){1to8}, %ymm20 vcvtqq2ps -1032(%rdx){1to8}, %ymm20
// CHECK: vcvtqq2ps %zmm19, %ymm28
// CHECK: encoding: [0x62,0x21,0xfc,0x48,0x5b,0xe3]
vcvtqq2ps %zmm19, %ymm28
// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3}
// CHECK: encoding: [0x62,0x21,0xfc,0x4b,0x5b,0xe3]
vcvtqq2ps %zmm19, %ymm28 {%k3}
// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
// CHECK: encoding: [0x62,0x21,0xfc,0xcb,0x5b,0xe3]
vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
// CHECK: vcvtqq2ps {rn-sae}, %zmm19, %ymm28
// CHECK: encoding: [0x62,0x21,0xfc,0x18,0x5b,0xe3]
vcvtqq2ps {rn-sae}, %zmm19, %ymm28
// CHECK: vcvtqq2ps {ru-sae}, %zmm19, %ymm28
// CHECK: encoding: [0x62,0x21,0xfc,0x58,0x5b,0xe3]
vcvtqq2ps {ru-sae}, %zmm19, %ymm28
// CHECK: vcvtqq2ps {rd-sae}, %zmm19, %ymm28
// CHECK: encoding: [0x62,0x21,0xfc,0x38,0x5b,0xe3]
vcvtqq2ps {rd-sae}, %zmm19, %ymm28
// CHECK: vcvtqq2ps {rz-sae}, %zmm19, %ymm28
// CHECK: encoding: [0x62,0x21,0xfc,0x78,0x5b,0xe3]
vcvtqq2ps {rz-sae}, %zmm19, %ymm28
// CHECK: vcvtqq2ps (%rcx), %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0x21]
vcvtqq2ps (%rcx), %ymm28
// CHECK: vcvtqq2ps 4660(%rax,%r14,8), %ymm28
// CHECK: encoding: [0x62,0x21,0xfc,0x48,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
vcvtqq2ps 4660(%rax,%r14,8), %ymm28
// CHECK: vcvtqq2ps (%rcx){1to8}, %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0x21]
vcvtqq2ps (%rcx){1to8}, %ymm28
// CHECK: vcvtqq2ps 8128(%rdx), %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x7f]
vcvtqq2ps 8128(%rdx), %ymm28
// CHECK: vcvtqq2ps 8192(%rdx), %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0x00,0x20,0x00,0x00]
vcvtqq2ps 8192(%rdx), %ymm28
// CHECK: vcvtqq2ps -8192(%rdx), %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x80]
vcvtqq2ps -8192(%rdx), %ymm28
// CHECK: vcvtqq2ps -8256(%rdx), %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0xc0,0xdf,0xff,0xff]
vcvtqq2ps -8256(%rdx), %ymm28
// CHECK: vcvtqq2ps 1016(%rdx){1to8}, %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x7f]
vcvtqq2ps 1016(%rdx){1to8}, %ymm28
// CHECK: vcvtqq2ps 1024(%rdx){1to8}, %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0x00,0x04,0x00,0x00]
vcvtqq2ps 1024(%rdx){1to8}, %ymm28
// CHECK: vcvtqq2ps -1024(%rdx){1to8}, %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x80]
vcvtqq2ps -1024(%rdx){1to8}, %ymm28
// CHECK: vcvtqq2ps -1032(%rdx){1to8}, %ymm28
// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
vcvtqq2ps -1032(%rdx){1to8}, %ymm28
// CHECK: vcvtuqq2pd %zmm29, %zmm21 // CHECK: vcvtuqq2pd %zmm29, %zmm21
// CHECK: encoding: [0x62,0x81,0xfe,0x48,0x7a,0xed] // CHECK: encoding: [0x62,0x81,0xfe,0x48,0x7a,0xed]
vcvtuqq2pd %zmm29, %zmm21 vcvtuqq2pd %zmm29, %zmm21
@ -3340,3 +3412,134 @@
vfpclasspsl $0x7b,-516(%rdx){1to16}, %k4 vfpclasspsl $0x7b,-516(%rdx){1to16}, %k4
// CHECK: vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
// CHECK: encoding: [0x62,0xa1,0xff,0x18,0x7a,0xd5]
vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
// CHECK: vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
// CHECK: encoding: [0x62,0xa1,0xff,0x58,0x7a,0xd5]
vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
// CHECK: vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
// CHECK: encoding: [0x62,0xa1,0xff,0x38,0x7a,0xd5]
vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
// CHECK: vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
// CHECK: encoding: [0x62,0xa1,0xff,0x78,0x7a,0xd5]
vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
// CHECK: vcvtuqq2ps (%rcx), %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x11]
vcvtuqq2ps (%rcx), %ymm18
// CHECK: vcvtuqq2ps 291(%rax,%r14,8), %ymm18
// CHECK: encoding: [0x62,0xa1,0xff,0x48,0x7a,0x94,0xf0,0x23,0x01,0x00,0x00]
vcvtuqq2ps 291(%rax,%r14,8), %ymm18
// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x11]
vcvtuqq2ps (%rcx){1to8}, %ymm18
// CHECK: vcvtuqq2ps 8128(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x7f]
vcvtuqq2ps 8128(%rdx), %ymm18
// CHECK: vcvtuqq2ps 8192(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0x00,0x20,0x00,0x00]
vcvtuqq2ps 8192(%rdx), %ymm18
// CHECK: vcvtuqq2ps -8192(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x80]
vcvtuqq2ps -8192(%rdx), %ymm18
// CHECK: vcvtuqq2ps -8256(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0xc0,0xdf,0xff,0xff]
vcvtuqq2ps -8256(%rdx), %ymm18
// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x7f]
vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0x00,0x04,0x00,0x00]
vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x80]
vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
// CHECK: vcvtuqq2ps %zmm26, %ymm25
// CHECK: encoding: [0x62,0x01,0xff,0x48,0x7a,0xca]
vcvtuqq2ps %zmm26, %ymm25
// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2}
// CHECK: encoding: [0x62,0x01,0xff,0x4a,0x7a,0xca]
vcvtuqq2ps %zmm26, %ymm25 {%k2}
// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
// CHECK: encoding: [0x62,0x01,0xff,0xca,0x7a,0xca]
vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
// CHECK: vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
// CHECK: encoding: [0x62,0x01,0xff,0x18,0x7a,0xca]
vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
// CHECK: vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
// CHECK: encoding: [0x62,0x01,0xff,0x58,0x7a,0xca]
vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
// CHECK: vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
// CHECK: encoding: [0x62,0x01,0xff,0x38,0x7a,0xca]
vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
// CHECK: vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
// CHECK: encoding: [0x62,0x01,0xff,0x78,0x7a,0xca]
vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
// CHECK: vcvtuqq2ps (%rcx), %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x09]
vcvtuqq2ps (%rcx), %ymm25
// CHECK: vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
// CHECK: encoding: [0x62,0x21,0xff,0x48,0x7a,0x8c,0xf0,0x34,0x12,0x00,0x00]
vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x09]
vcvtuqq2ps (%rcx){1to8}, %ymm25
// CHECK: vcvtuqq2ps 8128(%rdx), %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x7f]
vcvtuqq2ps 8128(%rdx), %ymm25
// CHECK: vcvtuqq2ps 8192(%rdx), %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0x00,0x20,0x00,0x00]
vcvtuqq2ps 8192(%rdx), %ymm25
// CHECK: vcvtuqq2ps -8192(%rdx), %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x80]
vcvtuqq2ps -8192(%rdx), %ymm25
// CHECK: vcvtuqq2ps -8256(%rdx), %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0xc0,0xdf,0xff,0xff]
vcvtuqq2ps -8256(%rdx), %ymm25
// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x7f]
vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0x00,0x04,0x00,0x00]
vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x80]
vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to8}, %ymm25

View File

@ -3360,6 +3360,118 @@
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x5b,0x9a,0xf8,0xfb,0xff,0xff] // CHECK: encoding: [0x62,0x61,0xfc,0x38,0x5b,0x9a,0xf8,0xfb,0xff,0xff]
vcvtqq2ps -1032(%rdx){1to4}, %xmm27 vcvtqq2ps -1032(%rdx){1to4}, %xmm27
// CHECK: vcvtqq2ps %xmm26, %xmm30
// CHECK: encoding: [0x62,0x01,0xfc,0x08,0x5b,0xf2]
vcvtqq2ps %xmm26, %xmm30
// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4}
// CHECK: encoding: [0x62,0x01,0xfc,0x0c,0x5b,0xf2]
vcvtqq2ps %xmm26, %xmm30 {%k4}
// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
// CHECK: encoding: [0x62,0x01,0xfc,0x8c,0x5b,0xf2]
vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
// CHECK: vcvtqq2psx (%rcx), %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0x31]
vcvtqq2psx (%rcx), %xmm30
// CHECK: vcvtqq2psx 4660(%rax,%r14,8), %xmm30
// CHECK: encoding: [0x62,0x21,0xfc,0x08,0x5b,0xb4,0xf0,0x34,0x12,0x00,0x00]
vcvtqq2psx 4660(%rax,%r14,8), %xmm30
// CHECK: vcvtqq2ps (%rcx){1to2}, %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0x31]
vcvtqq2ps (%rcx){1to2}, %xmm30
// CHECK: vcvtqq2psx 2032(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x7f]
vcvtqq2psx 2032(%rdx), %xmm30
// CHECK: vcvtqq2psx 2048(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0x00,0x08,0x00,0x00]
vcvtqq2psx 2048(%rdx), %xmm30
// CHECK: vcvtqq2psx -2048(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x80]
vcvtqq2psx -2048(%rdx), %xmm30
// CHECK: vcvtqq2psx -2064(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0xf0,0xf7,0xff,0xff]
vcvtqq2psx -2064(%rdx), %xmm30
// CHECK: vcvtqq2ps 1016(%rdx){1to2}, %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x7f]
vcvtqq2ps 1016(%rdx){1to2}, %xmm30
// CHECK: vcvtqq2ps 1024(%rdx){1to2}, %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0x00,0x04,0x00,0x00]
vcvtqq2ps 1024(%rdx){1to2}, %xmm30
// CHECK: vcvtqq2ps -1024(%rdx){1to2}, %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x80]
vcvtqq2ps -1024(%rdx){1to2}, %xmm30
// CHECK: vcvtqq2ps -1032(%rdx){1to2}, %xmm30
// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0xf8,0xfb,0xff,0xff]
vcvtqq2ps -1032(%rdx){1to2}, %xmm30
// CHECK: vcvtqq2ps %ymm28, %xmm20
// CHECK: encoding: [0x62,0x81,0xfc,0x28,0x5b,0xe4]
vcvtqq2ps %ymm28, %xmm20
// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3}
// CHECK: encoding: [0x62,0x81,0xfc,0x2b,0x5b,0xe4]
vcvtqq2ps %ymm28, %xmm20 {%k3}
// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
// CHECK: encoding: [0x62,0x81,0xfc,0xab,0x5b,0xe4]
vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
// CHECK: vcvtqq2psy (%rcx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x21]
vcvtqq2psy (%rcx), %xmm20
// CHECK: vcvtqq2psy 4660(%rax,%r14,8), %xmm20
// CHECK: encoding: [0x62,0xa1,0xfc,0x28,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
vcvtqq2psy 4660(%rax,%r14,8), %xmm20
// CHECK: vcvtqq2ps (%rcx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x21]
vcvtqq2ps (%rcx){1to4}, %xmm20
// CHECK: vcvtqq2psy 4064(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x7f]
vcvtqq2psy 4064(%rdx), %xmm20
// CHECK: vcvtqq2psy 4096(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0x00,0x10,0x00,0x00]
vcvtqq2psy 4096(%rdx), %xmm20
// CHECK: vcvtqq2psy -4096(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x80]
vcvtqq2psy -4096(%rdx), %xmm20
// CHECK: vcvtqq2psy -4128(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0xe0,0xef,0xff,0xff]
vcvtqq2psy -4128(%rdx), %xmm20
// CHECK: vcvtqq2ps 1016(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x7f]
vcvtqq2ps 1016(%rdx){1to4}, %xmm20
// CHECK: vcvtqq2ps 1024(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0x00,0x04,0x00,0x00]
vcvtqq2ps 1024(%rdx){1to4}, %xmm20
// CHECK: vcvtqq2ps -1024(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x80]
vcvtqq2ps -1024(%rdx){1to4}, %xmm20
// CHECK: vcvtqq2ps -1032(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
vcvtqq2ps -1032(%rdx){1to4}, %xmm20
// CHECK: vcvtuqq2pd %xmm20, %xmm19 // CHECK: vcvtuqq2pd %xmm20, %xmm19
// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x7a,0xdc] // CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x7a,0xdc]
vcvtuqq2pd %xmm20, %xmm19 vcvtuqq2pd %xmm20, %xmm19
@ -3584,6 +3696,118 @@
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0xa2,0xf8,0xfb,0xff,0xff] // CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0xa2,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to4}, %xmm28 vcvtuqq2ps -1032(%rdx){1to4}, %xmm28
// CHECK: vcvtuqq2ps %xmm22, %xmm21
// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x7a,0xee]
vcvtuqq2ps %xmm22, %xmm21
// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6}
// CHECK: encoding: [0x62,0xa1,0xff,0x0e,0x7a,0xee]
vcvtuqq2ps %xmm22, %xmm21 {%k6}
// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
// CHECK: encoding: [0x62,0xa1,0xff,0x8e,0x7a,0xee]
vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
// CHECK: vcvtuqq2psx (%rcx), %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0x29]
vcvtuqq2psx (%rcx), %xmm21
// CHECK: vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x7a,0xac,0xf0,0x34,0x12,0x00,0x00]
vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
// CHECK: vcvtuqq2ps (%rcx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0x29]
vcvtuqq2ps (%rcx){1to2}, %xmm21
// CHECK: vcvtuqq2psx 2032(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x7f]
vcvtuqq2psx 2032(%rdx), %xmm21
// CHECK: vcvtuqq2psx 2048(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0x00,0x08,0x00,0x00]
vcvtuqq2psx 2048(%rdx), %xmm21
// CHECK: vcvtuqq2psx -2048(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x80]
vcvtuqq2psx -2048(%rdx), %xmm21
// CHECK: vcvtuqq2psx -2064(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0xf0,0xf7,0xff,0xff]
vcvtuqq2psx -2064(%rdx), %xmm21
// CHECK: vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x7f]
vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
// CHECK: vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0x00,0x04,0x00,0x00]
vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
// CHECK: vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x80]
vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
// CHECK: vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
// CHECK: vcvtuqq2ps %ymm17, %xmm26
// CHECK: encoding: [0x62,0x21,0xff,0x28,0x7a,0xd1]
vcvtuqq2ps %ymm17, %xmm26
// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4}
// CHECK: encoding: [0x62,0x21,0xff,0x2c,0x7a,0xd1]
vcvtuqq2ps %ymm17, %xmm26 {%k4}
// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
// CHECK: encoding: [0x62,0x21,0xff,0xac,0x7a,0xd1]
vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
// CHECK: vcvtuqq2psy (%rcx), %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x11]
vcvtuqq2psy (%rcx), %xmm26
// CHECK: vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
// CHECK: encoding: [0x62,0x21,0xff,0x28,0x7a,0x94,0xf0,0x34,0x12,0x00,0x00]
vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
// CHECK: vcvtuqq2ps (%rcx){1to4}, %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x11]
vcvtuqq2ps (%rcx){1to4}, %xmm26
// CHECK: vcvtuqq2psy 4064(%rdx), %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x7f]
vcvtuqq2psy 4064(%rdx), %xmm26
// CHECK: vcvtuqq2psy 4096(%rdx), %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0x00,0x10,0x00,0x00]
vcvtuqq2psy 4096(%rdx), %xmm26
// CHECK: vcvtuqq2psy -4096(%rdx), %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x80]
vcvtuqq2psy -4096(%rdx), %xmm26
// CHECK: vcvtuqq2psy -4128(%rdx), %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0xe0,0xef,0xff,0xff]
vcvtuqq2psy -4128(%rdx), %xmm26
// CHECK: vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x7f]
vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
// CHECK: vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0x00,0x04,0x00,0x00]
vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
// CHECK: vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x80]
vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
// CHECK: vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 // CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21
// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0xab] // CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0xab]
vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21

View File

@ -20523,3 +20523,338 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00] // CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00]
vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1} vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1}
// CHECK: vcvtpd2dq %xmm20, %xmm25
// CHECK: encoding: [0x62,0x21,0xff,0x08,0xe6,0xcc]
vcvtpd2dq %xmm20, %xmm25
// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2}
// CHECK: encoding: [0x62,0x21,0xff,0x0a,0xe6,0xcc]
vcvtpd2dq %xmm20, %xmm25 {%k2}
// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
// CHECK: encoding: [0x62,0x21,0xff,0x8a,0xe6,0xcc]
vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
// CHECK: vcvtpd2dqx (%rcx), %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x09]
vcvtpd2dqx (%rcx), %xmm25
// CHECK: vcvtpd2dqx 291(%rax,%r14,8), %xmm25
// CHECK: encoding: [0x62,0x21,0xff,0x08,0xe6,0x8c,0xf0,0x23,0x01,0x00,0x00]
vcvtpd2dqx 291(%rax,%r14,8), %xmm25
// CHECK: vcvtpd2dq (%rcx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x09]
vcvtpd2dq (%rcx){1to2}, %xmm25
// CHECK: vcvtpd2dqx 2032(%rdx), %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x7f]
vcvtpd2dqx 2032(%rdx), %xmm25
// CHECK: vcvtpd2dqx 2048(%rdx), %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0x00,0x08,0x00,0x00]
vcvtpd2dqx 2048(%rdx), %xmm25
// CHECK: vcvtpd2dqx -2048(%rdx), %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x80]
vcvtpd2dqx -2048(%rdx), %xmm25
// CHECK: vcvtpd2dqx -2064(%rdx), %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0xf0,0xf7,0xff,0xff]
vcvtpd2dqx -2064(%rdx), %xmm25
// CHECK: vcvtpd2dq 1016(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x7f]
vcvtpd2dq 1016(%rdx){1to2}, %xmm25
// CHECK: vcvtpd2dq 1024(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0x00,0x04,0x00,0x00]
vcvtpd2dq 1024(%rdx){1to2}, %xmm25
// CHECK: vcvtpd2dq -1024(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x80]
vcvtpd2dq -1024(%rdx){1to2}, %xmm25
// CHECK: vcvtpd2dq -1032(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0xf8,0xfb,0xff,0xff]
vcvtpd2dq -1032(%rdx){1to2}, %xmm25
// CHECK: vcvtpd2dq %ymm28, %xmm30
// CHECK: encoding: [0x62,0x01,0xff,0x28,0xe6,0xf4]
vcvtpd2dq %ymm28, %xmm30
// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3}
// CHECK: encoding: [0x62,0x01,0xff,0x2b,0xe6,0xf4]
vcvtpd2dq %ymm28, %xmm30 {%k3}
// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
// CHECK: encoding: [0x62,0x01,0xff,0xab,0xe6,0xf4]
vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
// CHECK: vcvtpd2dqy (%rcx), %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0x31]
vcvtpd2dqy (%rcx), %xmm30
// CHECK: vcvtpd2dqy 291(%rax,%r14,8), %xmm30
// CHECK: encoding: [0x62,0x21,0xff,0x28,0xe6,0xb4,0xf0,0x23,0x01,0x00,0x00]
vcvtpd2dqy 291(%rax,%r14,8), %xmm30
// CHECK: vcvtpd2dq (%rcx){1to4}, %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0x31]
vcvtpd2dq (%rcx){1to4}, %xmm30
// CHECK: vcvtpd2dqy 4064(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x7f]
vcvtpd2dqy 4064(%rdx), %xmm30
// CHECK: vcvtpd2dqy 4096(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0x00,0x10,0x00,0x00]
vcvtpd2dqy 4096(%rdx), %xmm30
// CHECK: vcvtpd2dqy -4096(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x80]
vcvtpd2dqy -4096(%rdx), %xmm30
// CHECK: vcvtpd2dqy -4128(%rdx), %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0xe0,0xef,0xff,0xff]
vcvtpd2dqy -4128(%rdx), %xmm30
// CHECK: vcvtpd2dq 1016(%rdx){1to4}, %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x7f]
vcvtpd2dq 1016(%rdx){1to4}, %xmm30
// CHECK: vcvtpd2dq 1024(%rdx){1to4}, %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0x00,0x04,0x00,0x00]
vcvtpd2dq 1024(%rdx){1to4}, %xmm30
// CHECK: vcvtpd2dq -1024(%rdx){1to4}, %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x80]
vcvtpd2dq -1024(%rdx){1to4}, %xmm30
// CHECK: vcvtpd2dq -1032(%rdx){1to4}, %xmm30
// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0xf8,0xfb,0xff,0xff]
vcvtpd2dq -1032(%rdx){1to4}, %xmm30
// CHECK: vcvtpd2ps %xmm27, %xmm27
// CHECK: encoding: [0x62,0x01,0xfd,0x08,0x5a,0xdb]
vcvtpd2ps %xmm27, %xmm27
// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7}
// CHECK: encoding: [0x62,0x01,0xfd,0x0f,0x5a,0xdb]
vcvtpd2ps %xmm27, %xmm27 {%k7}
// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
// CHECK: encoding: [0x62,0x01,0xfd,0x8f,0x5a,0xdb]
vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
// CHECK: vcvtpd2psx (%rcx), %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x19]
vcvtpd2psx (%rcx), %xmm27
// CHECK: vcvtpd2psx 291(%rax,%r14,8), %xmm27
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x5a,0x9c,0xf0,0x23,0x01,0x00,0x00]
vcvtpd2psx 291(%rax,%r14,8), %xmm27
// CHECK: vcvtpd2ps (%rcx){1to2}, %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x19]
vcvtpd2ps (%rcx){1to2}, %xmm27
// CHECK: vcvtpd2psx 2032(%rdx), %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x7f]
vcvtpd2psx 2032(%rdx), %xmm27
// CHECK: vcvtpd2psx 2048(%rdx), %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0x00,0x08,0x00,0x00]
vcvtpd2psx 2048(%rdx), %xmm27
// CHECK: vcvtpd2psx -2048(%rdx), %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x80]
vcvtpd2psx -2048(%rdx), %xmm27
// CHECK: vcvtpd2psx -2064(%rdx), %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0xf0,0xf7,0xff,0xff]
vcvtpd2psx -2064(%rdx), %xmm27
// CHECK: vcvtpd2ps 1016(%rdx){1to2}, %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x7f]
vcvtpd2ps 1016(%rdx){1to2}, %xmm27
// CHECK: vcvtpd2ps 1024(%rdx){1to2}, %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0x00,0x04,0x00,0x00]
vcvtpd2ps 1024(%rdx){1to2}, %xmm27
// CHECK: vcvtpd2ps -1024(%rdx){1to2}, %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x80]
vcvtpd2ps -1024(%rdx){1to2}, %xmm27
// CHECK: vcvtpd2ps -1032(%rdx){1to2}, %xmm27
// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0xf8,0xfb,0xff,0xff]
vcvtpd2ps -1032(%rdx){1to2}, %xmm27
// CHECK: vcvtpd2ps %ymm20, %xmm20
// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xe4]
vcvtpd2ps %ymm20, %xmm20
// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6}
// CHECK: encoding: [0x62,0xa1,0xfd,0x2e,0x5a,0xe4]
vcvtpd2ps %ymm20, %xmm20 {%k6}
// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
// CHECK: encoding: [0x62,0xa1,0xfd,0xae,0x5a,0xe4]
vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
// CHECK: vcvtpd2psy (%rcx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x21]
vcvtpd2psy (%rcx), %xmm20
// CHECK: vcvtpd2psy 291(%rax,%r14,8), %xmm20
// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xa4,0xf0,0x23,0x01,0x00,0x00]
vcvtpd2psy 291(%rax,%r14,8), %xmm20
// CHECK: vcvtpd2ps (%rcx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x21]
vcvtpd2ps (%rcx){1to4}, %xmm20
// CHECK: vcvtpd2psy 4064(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x7f]
vcvtpd2psy 4064(%rdx), %xmm20
// CHECK: vcvtpd2psy 4096(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0x00,0x10,0x00,0x00]
vcvtpd2psy 4096(%rdx), %xmm20
// CHECK: vcvtpd2psy -4096(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x80]
vcvtpd2psy -4096(%rdx), %xmm20
// CHECK: vcvtpd2psy -4128(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0xe0,0xef,0xff,0xff]
vcvtpd2psy -4128(%rdx), %xmm20
// CHECK: vcvtpd2ps 1016(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x7f]
vcvtpd2ps 1016(%rdx){1to4}, %xmm20
// CHECK: vcvtpd2ps 1024(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0x00,0x04,0x00,0x00]
vcvtpd2ps 1024(%rdx){1to4}, %xmm20
// CHECK: vcvtpd2ps -1024(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x80]
vcvtpd2ps -1024(%rdx){1to4}, %xmm20
// CHECK: vcvtpd2ps -1032(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0xf8,0xfb,0xff,0xff]
vcvtpd2ps -1032(%rdx){1to4}, %xmm20
// CHECK: vcvttpd2udq %xmm26, %xmm23
// CHECK: encoding: [0x62,0x81,0xfc,0x08,0x78,0xfa]
vcvttpd2udq %xmm26, %xmm23
// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2}
// CHECK: encoding: [0x62,0x81,0xfc,0x0a,0x78,0xfa]
vcvttpd2udq %xmm26, %xmm23 {%k2}
// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
// CHECK: encoding: [0x62,0x81,0xfc,0x8a,0x78,0xfa]
vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
// CHECK: vcvttpd2udqx (%rcx), %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0x39]
vcvttpd2udqx (%rcx), %xmm23
// CHECK: vcvttpd2udqx 291(%rax,%r14,8), %xmm23
// CHECK: encoding: [0x62,0xa1,0xfc,0x08,0x78,0xbc,0xf0,0x23,0x01,0x00,0x00]
vcvttpd2udqx 291(%rax,%r14,8), %xmm23
// CHECK: vcvttpd2udq (%rcx){1to2}, %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0x39]
vcvttpd2udq (%rcx){1to2}, %xmm23
// CHECK: vcvttpd2udqx 2032(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x7f]
vcvttpd2udqx 2032(%rdx), %xmm23
// CHECK: vcvttpd2udqx 2048(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0x00,0x08,0x00,0x00]
vcvttpd2udqx 2048(%rdx), %xmm23
// CHECK: vcvttpd2udqx -2048(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x80]
vcvttpd2udqx -2048(%rdx), %xmm23
// CHECK: vcvttpd2udqx -2064(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0xf0,0xf7,0xff,0xff]
vcvttpd2udqx -2064(%rdx), %xmm23
// CHECK: vcvttpd2udq 1016(%rdx){1to2}, %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x7f]
vcvttpd2udq 1016(%rdx){1to2}, %xmm23
// CHECK: vcvttpd2udq 1024(%rdx){1to2}, %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0x00,0x04,0x00,0x00]
vcvttpd2udq 1024(%rdx){1to2}, %xmm23
// CHECK: vcvttpd2udq -1024(%rdx){1to2}, %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x80]
vcvttpd2udq -1024(%rdx){1to2}, %xmm23
// CHECK: vcvttpd2udq -1032(%rdx){1to2}, %xmm23
// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0xf8,0xfb,0xff,0xff]
vcvttpd2udq -1032(%rdx){1to2}, %xmm23
// CHECK: vcvttpd2udq %ymm23, %xmm28
// CHECK: encoding: [0x62,0x21,0xfc,0x28,0x78,0xe7]
vcvttpd2udq %ymm23, %xmm28
// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6}
// CHECK: encoding: [0x62,0x21,0xfc,0x2e,0x78,0xe7]
vcvttpd2udq %ymm23, %xmm28 {%k6}
// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
// CHECK: encoding: [0x62,0x21,0xfc,0xae,0x78,0xe7]
vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
// CHECK: vcvttpd2udqy (%rcx), %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0x21]
vcvttpd2udqy (%rcx), %xmm28
// CHECK: vcvttpd2udqy 291(%rax,%r14,8), %xmm28
// CHECK: encoding: [0x62,0x21,0xfc,0x28,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
vcvttpd2udqy 291(%rax,%r14,8), %xmm28
// CHECK: vcvttpd2udq (%rcx){1to4}, %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0x21]
vcvttpd2udq (%rcx){1to4}, %xmm28
// CHECK: vcvttpd2udqy 4064(%rdx), %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x7f]
vcvttpd2udqy 4064(%rdx), %xmm28
// CHECK: vcvttpd2udqy 4096(%rdx), %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0x00,0x10,0x00,0x00]
vcvttpd2udqy 4096(%rdx), %xmm28
// CHECK: vcvttpd2udqy -4096(%rdx), %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x80]
vcvttpd2udqy -4096(%rdx), %xmm28
// CHECK: vcvttpd2udqy -4128(%rdx), %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0xe0,0xef,0xff,0xff]
vcvttpd2udqy -4128(%rdx), %xmm28
// CHECK: vcvttpd2udq 1016(%rdx){1to4}, %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x7f]
vcvttpd2udq 1016(%rdx){1to4}, %xmm28
// CHECK: vcvttpd2udq 1024(%rdx){1to4}, %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0x00,0x04,0x00,0x00]
vcvttpd2udq 1024(%rdx){1to4}, %xmm28
// CHECK: vcvttpd2udq -1024(%rdx){1to4}, %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x80]
vcvttpd2udq -1024(%rdx){1to4}, %xmm28
// CHECK: vcvttpd2udq -1032(%rdx){1to4}, %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0xf8,0xfb,0xff,0xff]
vcvttpd2udq -1032(%rdx){1to4}, %xmm28