diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 102a3f4f589..5c8b089b4dd 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3831,6 +3831,26 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_scalef_pd_256 : GCCBuiltin<"__builtin_ia32_scalefpd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_v4f64_ty, llvm_i8_ty],[IntrNoMem]>; + def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_scalef_ps_256 : GCCBuiltin<"__builtin_ia32_scalefps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b2e08c28f48..d36bcd98e1d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15084,6 +15084,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1,Src2), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_2OP_MASK_RM: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue PassThru = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + // We specify 2 possible modes for intrinsics, with/without rounding modes. + // First, we check if the intrinsic have rounding mode (6 operands), + // if not, we set rounding mode to "current". + SDValue Rnd; + if (Op.getNumOperands() == 6) + Rnd = Op.getOperand(5); + else + Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Rnd), + Mask, PassThru, Subtarget, DAG); + } case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -18490,9 +18507,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; + case X86ISD::SCALEF: return "X86ISD::SCALEF"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; - case X86ISD::AVG: return "X86ISD::AVG"; + case X86ISD::AVG: return "X86ISD::AVG"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dd6aecdab7e..372121b39ad 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -211,7 +211,8 @@ namespace llvm { // FP vector get exponent FGETEXP_RND, - + // FP Scale + SCALEF, // Integer add/sub with unsigned saturation. ADDUS, SUBUS, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d2197a20b12..79f0ceb1315 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3436,7 +3436,7 @@ multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_round_packed opc, string OpcodeStr, SDNode OpNodeRnd, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _> { defm rb: AVX512_maskable opc, string OpcodeStr, SDNode OpNodeRn multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDNode OpNodeRnd, - X86VectorVTInfo _, bit IsCommutable> { + X86VectorVTInfo _> { defm rb: AVX512_maskable opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_round_packed, + defm PSZ : avx512_fp_round_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed, + defm PDZ : avx512_fp_round_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_sae_packed, + defm PSZ : avx512_fp_sae_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_sae_packed, + defm PDZ : avx512_fp_sae_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } @@ -3513,6 +3513,48 @@ let Predicates = [HasDQI] in { defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>; } +multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable, EVEX_4V; + defm rmb: AVX512_maskable, + EVEX_4V, EVEX_B; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_scalef_p, + avx512_fp_round_packed, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp_scalef_p, + EVEX_V128, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp_scalef_p, + EVEX_V256, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp_scalef_p, + EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp_scalef_p, + EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; + } +} +defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD; + //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index f1fb9b1c6f4..072bc050aad 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -311,6 +311,7 @@ def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; +def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index c037b7b3d82..d8c5dd918ac 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -21,7 +21,7 @@ enum IntrinsicType { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, + INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, @@ -668,6 +668,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM, + X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index dc3dc0f72d5..b9f490b8a39 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -3144,3 +3144,27 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 %res2 = add <16 x i32> %res, %res1 ret <16 x i32> %res2 } + +declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefpd{{.*}}{%k1} +define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { + %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3) + %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefps{{.*}}{%k1} +define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { + %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2) + %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index b2da994a868..fb7c93dc53b 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2954,3 +2954,53 @@ define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x ret <8 x i32> %res2 } + +declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefpd{{.*}}{%k1} +define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { + %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefpd{{.*}}{%k1} +define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { + %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefps{{.*}}{%k1} +define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { + %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefps{{.*}}{%k1} +define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { + %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} \ No newline at end of file diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 32787a03499..1ab484d2a80 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -9905,3 +9905,146 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1} // CHECK: encoding: [0x62,0x72,0xfd,0x41,0xa1,0xb4,0xa1,0x00,0x04,0x00,0x00] vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1} +// CHECK: vscalefpd %zmm28, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x02,0xad,0x40,0x2c,0xd4] + vscalefpd %zmm28, %zmm26, %zmm26 + +// CHECK: vscalefpd %zmm28, %zmm26, %zmm26 {%k5} +// CHECK: encoding: [0x62,0x02,0xad,0x45,0x2c,0xd4] + vscalefpd %zmm28, %zmm26, %zmm26 {%k5} + +// CHECK: vscalefpd %zmm28, %zmm26, %zmm26 {%k5} {z} +// CHECK: encoding: [0x62,0x02,0xad,0xc5,0x2c,0xd4] + vscalefpd %zmm28, %zmm26, %zmm26 {%k5} {z} + +// CHECK: vscalefpd {rn-sae}, %zmm28, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x02,0xad,0x10,0x2c,0xd4] + vscalefpd {rn-sae}, %zmm28, %zmm26, %zmm26 + +// CHECK: vscalefpd {ru-sae}, %zmm28, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x02,0xad,0x50,0x2c,0xd4] + vscalefpd {ru-sae}, %zmm28, %zmm26, %zmm26 + +// CHECK: vscalefpd {rd-sae}, %zmm28, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x02,0xad,0x30,0x2c,0xd4] + vscalefpd {rd-sae}, %zmm28, %zmm26, %zmm26 + +// CHECK: vscalefpd {rz-sae}, %zmm28, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x02,0xad,0x70,0x2c,0xd4] + vscalefpd {rz-sae}, %zmm28, %zmm26, %zmm26 + +// CHECK: vscalefpd (%rcx), %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x40,0x2c,0x11] + vscalefpd (%rcx), %zmm26, %zmm26 + +// CHECK: vscalefpd 291(%rax,%r14,8), %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x22,0xad,0x40,0x2c,0x94,0xf0,0x23,0x01,0x00,0x00] + vscalefpd 291(%rax,%r14,8), %zmm26, %zmm26 + +// CHECK: vscalefpd (%rcx){1to8}, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x50,0x2c,0x11] + vscalefpd (%rcx){1to8}, %zmm26, %zmm26 + +// CHECK: vscalefpd 8128(%rdx), %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x40,0x2c,0x52,0x7f] + vscalefpd 8128(%rdx), %zmm26, %zmm26 + +// CHECK: vscalefpd 8192(%rdx), %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x40,0x2c,0x92,0x00,0x20,0x00,0x00] + vscalefpd 8192(%rdx), %zmm26, %zmm26 + +// CHECK: vscalefpd -8192(%rdx), %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x40,0x2c,0x52,0x80] + vscalefpd -8192(%rdx), %zmm26, %zmm26 + +// CHECK: vscalefpd -8256(%rdx), %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x40,0x2c,0x92,0xc0,0xdf,0xff,0xff] + vscalefpd -8256(%rdx), %zmm26, %zmm26 + +// CHECK: vscalefpd 1016(%rdx){1to8}, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x50,0x2c,0x52,0x7f] + vscalefpd 1016(%rdx){1to8}, %zmm26, %zmm26 + +// CHECK: vscalefpd 1024(%rdx){1to8}, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x50,0x2c,0x92,0x00,0x04,0x00,0x00] + vscalefpd 1024(%rdx){1to8}, %zmm26, %zmm26 + +// CHECK: vscalefpd -1024(%rdx){1to8}, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x50,0x2c,0x52,0x80] + vscalefpd -1024(%rdx){1to8}, %zmm26, %zmm26 + +// CHECK: vscalefpd -1032(%rdx){1to8}, %zmm26, %zmm26 +// CHECK: encoding: [0x62,0x62,0xad,0x50,0x2c,0x92,0xf8,0xfb,0xff,0xff] + vscalefpd -1032(%rdx){1to8}, %zmm26, %zmm26 + +// CHECK: vscalefps %zmm18, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xa2,0x4d,0x48,0x2c,0xda] + vscalefps %zmm18, %zmm6, %zmm19 + +// CHECK: vscalefps %zmm18, %zmm6, %zmm19 {%k6} +// CHECK: encoding: [0x62,0xa2,0x4d,0x4e,0x2c,0xda] + vscalefps %zmm18, %zmm6, %zmm19 {%k6} + +// CHECK: vscalefps %zmm18, %zmm6, %zmm19 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0x4d,0xce,0x2c,0xda] + vscalefps %zmm18, %zmm6, %zmm19 {%k6} {z} + +// CHECK: vscalefps {rn-sae}, %zmm18, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xa2,0x4d,0x18,0x2c,0xda] + vscalefps {rn-sae}, %zmm18, %zmm6, %zmm19 + +// CHECK: vscalefps {ru-sae}, %zmm18, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xa2,0x4d,0x58,0x2c,0xda] + vscalefps {ru-sae}, %zmm18, %zmm6, %zmm19 + +// CHECK: vscalefps {rd-sae}, %zmm18, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xa2,0x4d,0x38,0x2c,0xda] + vscalefps {rd-sae}, %zmm18, %zmm6, %zmm19 + +// CHECK: vscalefps {rz-sae}, %zmm18, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xa2,0x4d,0x78,0x2c,0xda] + vscalefps {rz-sae}, %zmm18, %zmm6, %zmm19 + +// CHECK: vscalefps (%rcx), %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x19] + vscalefps (%rcx), %zmm6, %zmm19 + +// CHECK: vscalefps 291(%rax,%r14,8), %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xa2,0x4d,0x48,0x2c,0x9c,0xf0,0x23,0x01,0x00,0x00] + vscalefps 291(%rax,%r14,8), %zmm6, %zmm19 + +// CHECK: vscalefps (%rcx){1to16}, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x19] + vscalefps (%rcx){1to16}, %zmm6, %zmm19 + +// CHECK: vscalefps 8128(%rdx), %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x5a,0x7f] + vscalefps 8128(%rdx), %zmm6, %zmm19 + +// CHECK: vscalefps 8192(%rdx), %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x9a,0x00,0x20,0x00,0x00] + vscalefps 8192(%rdx), %zmm6, %zmm19 + +// CHECK: vscalefps -8192(%rdx), %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x5a,0x80] + vscalefps -8192(%rdx), %zmm6, %zmm19 + +// CHECK: vscalefps -8256(%rdx), %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x9a,0xc0,0xdf,0xff,0xff] + vscalefps -8256(%rdx), %zmm6, %zmm19 + +// CHECK: vscalefps 508(%rdx){1to16}, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x5a,0x7f] + vscalefps 508(%rdx){1to16}, %zmm6, %zmm19 + +// CHECK: vscalefps 512(%rdx){1to16}, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x9a,0x00,0x02,0x00,0x00] + vscalefps 512(%rdx){1to16}, %zmm6, %zmm19 + +// CHECK: vscalefps -512(%rdx){1to16}, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x5a,0x80] + vscalefps -512(%rdx){1to16}, %zmm6, %zmm19 + +// CHECK: vscalefps -516(%rdx){1to16}, %zmm6, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x9a,0xfc,0xfd,0xff,0xff] + vscalefps -516(%rdx){1to16}, %zmm6, %zmm19 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index 1381b2e76e1..5d80beb081e 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -12028,3 +12028,227 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: vpermt2pd -1032(%rdx){1to4}, %ymm23, %ymm17 // CHECK: encoding: [0x62,0xe2,0xc5,0x30,0x7f,0x8a,0xf8,0xfb,0xff,0xff] vpermt2pd -1032(%rdx){1to4}, %ymm23, %ymm17 + +// CHECK: vscalefpd %xmm17, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x22,0xf5,0x00,0x2c,0xc1] + vscalefpd %xmm17, %xmm17, %xmm24 + +// CHECK: vscalefpd %xmm17, %xmm17, %xmm24 {%k2} +// CHECK: encoding: [0x62,0x22,0xf5,0x02,0x2c,0xc1] + vscalefpd %xmm17, %xmm17, %xmm24 {%k2} + +// CHECK: vscalefpd %xmm17, %xmm17, %xmm24 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0xf5,0x82,0x2c,0xc1] + vscalefpd %xmm17, %xmm17, %xmm24 {%k2} {z} + +// CHECK: vscalefpd (%rcx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x00,0x2c,0x01] + vscalefpd (%rcx), %xmm17, %xmm24 + +// CHECK: vscalefpd 291(%rax,%r14,8), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x22,0xf5,0x00,0x2c,0x84,0xf0,0x23,0x01,0x00,0x00] + vscalefpd 291(%rax,%r14,8), %xmm17, %xmm24 + +// CHECK: vscalefpd (%rcx){1to2}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x10,0x2c,0x01] + vscalefpd (%rcx){1to2}, %xmm17, %xmm24 + +// CHECK: vscalefpd 2032(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x00,0x2c,0x42,0x7f] + vscalefpd 2032(%rdx), %xmm17, %xmm24 + +// CHECK: vscalefpd 2048(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x00,0x2c,0x82,0x00,0x08,0x00,0x00] + vscalefpd 2048(%rdx), %xmm17, %xmm24 + +// CHECK: vscalefpd -2048(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x00,0x2c,0x42,0x80] + vscalefpd -2048(%rdx), %xmm17, %xmm24 + +// CHECK: vscalefpd -2064(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x00,0x2c,0x82,0xf0,0xf7,0xff,0xff] + vscalefpd -2064(%rdx), %xmm17, %xmm24 + +// CHECK: vscalefpd 1016(%rdx){1to2}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x10,0x2c,0x42,0x7f] + vscalefpd 1016(%rdx){1to2}, %xmm17, %xmm24 + +// CHECK: vscalefpd 1024(%rdx){1to2}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x10,0x2c,0x82,0x00,0x04,0x00,0x00] + vscalefpd 1024(%rdx){1to2}, %xmm17, %xmm24 + +// CHECK: vscalefpd -1024(%rdx){1to2}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x10,0x2c,0x42,0x80] + vscalefpd -1024(%rdx){1to2}, %xmm17, %xmm24 + +// CHECK: vscalefpd -1032(%rdx){1to2}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x62,0xf5,0x10,0x2c,0x82,0xf8,0xfb,0xff,0xff] + vscalefpd -1032(%rdx){1to2}, %xmm17, %xmm24 + +// CHECK: vscalefpd %ymm26, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x02,0xdd,0x20,0x2c,0xea] + vscalefpd %ymm26, %ymm20, %ymm29 + +// CHECK: vscalefpd %ymm26, %ymm20, %ymm29 {%k2} +// CHECK: encoding: [0x62,0x02,0xdd,0x22,0x2c,0xea] + vscalefpd %ymm26, %ymm20, %ymm29 {%k2} + +// CHECK: vscalefpd %ymm26, %ymm20, %ymm29 {%k2} {z} +// CHECK: encoding: [0x62,0x02,0xdd,0xa2,0x2c,0xea] + vscalefpd %ymm26, %ymm20, %ymm29 {%k2} {z} + +// CHECK: vscalefpd (%rcx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0x2c,0x29] + vscalefpd (%rcx), %ymm20, %ymm29 + +// CHECK: vscalefpd 291(%rax,%r14,8), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x22,0xdd,0x20,0x2c,0xac,0xf0,0x23,0x01,0x00,0x00] + vscalefpd 291(%rax,%r14,8), %ymm20, %ymm29 + +// CHECK: vscalefpd (%rcx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0x2c,0x29] + vscalefpd (%rcx){1to4}, %ymm20, %ymm29 + +// CHECK: vscalefpd 4064(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0x2c,0x6a,0x7f] + vscalefpd 4064(%rdx), %ymm20, %ymm29 + +// CHECK: vscalefpd 4096(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0x2c,0xaa,0x00,0x10,0x00,0x00] + vscalefpd 4096(%rdx), %ymm20, %ymm29 + +// CHECK: vscalefpd -4096(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0x2c,0x6a,0x80] + vscalefpd -4096(%rdx), %ymm20, %ymm29 + +// CHECK: vscalefpd -4128(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x20,0x2c,0xaa,0xe0,0xef,0xff,0xff] + vscalefpd -4128(%rdx), %ymm20, %ymm29 + +// CHECK: vscalefpd 1016(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0x2c,0x6a,0x7f] + vscalefpd 1016(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vscalefpd 1024(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0x2c,0xaa,0x00,0x04,0x00,0x00] + vscalefpd 1024(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vscalefpd -1024(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0x2c,0x6a,0x80] + vscalefpd -1024(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vscalefpd -1032(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x62,0xdd,0x30,0x2c,0xaa,0xf8,0xfb,0xff,0xff] + vscalefpd -1032(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vscalefps %xmm22, %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x25,0x00,0x2c,0xde] + vscalefps %xmm22, %xmm27, %xmm19 + +// CHECK: vscalefps %xmm22, %xmm27, %xmm19 {%k4} +// CHECK: encoding: [0x62,0xa2,0x25,0x04,0x2c,0xde] + vscalefps %xmm22, %xmm27, %xmm19 {%k4} + +// CHECK: vscalefps %xmm22, %xmm27, %xmm19 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x25,0x84,0x2c,0xde] + vscalefps %xmm22, %xmm27, %xmm19 {%k4} {z} + +// CHECK: vscalefps (%rcx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0x2c,0x19] + vscalefps (%rcx), %xmm27, %xmm19 + +// CHECK: vscalefps 291(%rax,%r14,8), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x25,0x00,0x2c,0x9c,0xf0,0x23,0x01,0x00,0x00] + vscalefps 291(%rax,%r14,8), %xmm27, %xmm19 + +// CHECK: vscalefps (%rcx){1to4}, %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0x2c,0x19] + vscalefps (%rcx){1to4}, %xmm27, %xmm19 + +// CHECK: vscalefps 2032(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0x2c,0x5a,0x7f] + vscalefps 2032(%rdx), %xmm27, %xmm19 + +// CHECK: vscalefps 2048(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0x2c,0x9a,0x00,0x08,0x00,0x00] + vscalefps 2048(%rdx), %xmm27, %xmm19 + +// CHECK: vscalefps -2048(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0x2c,0x5a,0x80] + vscalefps -2048(%rdx), %xmm27, %xmm19 + +// CHECK: vscalefps -2064(%rdx), %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x00,0x2c,0x9a,0xf0,0xf7,0xff,0xff] + vscalefps -2064(%rdx), %xmm27, %xmm19 + +// CHECK: vscalefps 508(%rdx){1to4}, %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0x2c,0x5a,0x7f] + vscalefps 508(%rdx){1to4}, %xmm27, %xmm19 + +// CHECK: vscalefps 512(%rdx){1to4}, %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0x2c,0x9a,0x00,0x02,0x00,0x00] + vscalefps 512(%rdx){1to4}, %xmm27, %xmm19 + +// CHECK: vscalefps -512(%rdx){1to4}, %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0x2c,0x5a,0x80] + vscalefps -512(%rdx){1to4}, %xmm27, %xmm19 + +// CHECK: vscalefps -516(%rdx){1to4}, %xmm27, %xmm19 +// CHECK: encoding: [0x62,0xe2,0x25,0x10,0x2c,0x9a,0xfc,0xfd,0xff,0xff] + vscalefps -516(%rdx){1to4}, %xmm27, %xmm19 + +// CHECK: vscalefps %ymm23, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x22,0x4d,0x20,0x2c,0xcf] + vscalefps %ymm23, %ymm22, %ymm25 + +// CHECK: vscalefps %ymm23, %ymm22, %ymm25 {%k4} +// CHECK: encoding: [0x62,0x22,0x4d,0x24,0x2c,0xcf] + vscalefps %ymm23, %ymm22, %ymm25 {%k4} + +// CHECK: vscalefps %ymm23, %ymm22, %ymm25 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x4d,0xa4,0x2c,0xcf] + vscalefps %ymm23, %ymm22, %ymm25 {%k4} {z} + +// CHECK: vscalefps (%rcx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0x2c,0x09] + vscalefps (%rcx), %ymm22, %ymm25 + +// CHECK: vscalefps 291(%rax,%r14,8), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x22,0x4d,0x20,0x2c,0x8c,0xf0,0x23,0x01,0x00,0x00] + vscalefps 291(%rax,%r14,8), %ymm22, %ymm25 + +// CHECK: vscalefps (%rcx){1to8}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x09] + vscalefps (%rcx){1to8}, %ymm22, %ymm25 + +// CHECK: vscalefps 4064(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0x2c,0x4a,0x7f] + vscalefps 4064(%rdx), %ymm22, %ymm25 + +// CHECK: vscalefps 4096(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0x2c,0x8a,0x00,0x10,0x00,0x00] + vscalefps 4096(%rdx), %ymm22, %ymm25 + +// CHECK: vscalefps -4096(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0x2c,0x4a,0x80] + vscalefps -4096(%rdx), %ymm22, %ymm25 + +// CHECK: vscalefps -4128(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x20,0x2c,0x8a,0xe0,0xef,0xff,0xff] + vscalefps -4128(%rdx), %ymm22, %ymm25 + +// CHECK: vscalefps 508(%rdx){1to8}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x4a,0x7f] + vscalefps 508(%rdx){1to8}, %ymm22, %ymm25 + +// CHECK: vscalefps 512(%rdx){1to8}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0x00,0x02,0x00,0x00] + vscalefps 512(%rdx){1to8}, %ymm22, %ymm25 + +// CHECK: vscalefps -512(%rdx){1to8}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x4a,0x80] + vscalefps -512(%rdx){1to8}, %ymm22, %ymm25 + +// CHECK: vscalefps -516(%rdx){1to8}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff] + vscalefps -516(%rdx){1to8}, %ymm22, %ymm25