1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 03:23:01 +02:00

AVX512: Implemented encoding and intrinsics for vpternlogd/q.

Differential Revision: http://reviews.llvm.org/D13768

llvm-svn: 250396
This commit is contained in:
Igor Breger 2015-10-15 12:33:24 +00:00
parent 50e9acac80
commit 6e29702ee8
10 changed files with 751 additions and 4 deletions

View File

@ -7048,6 +7048,82 @@ let TargetPrefix = "x86" in {
[llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
}
// Bitwise ternary logic
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_pternlog_d_128 :
GCCBuiltin<"__builtin_ia32_pternlogd128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_d_128 :
GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_d_256 :
GCCBuiltin<"__builtin_ia32_pternlogd256_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_d_256 :
GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_d_512 :
GCCBuiltin<"__builtin_ia32_pternlogd512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_d_512 :
GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_q_128 :
GCCBuiltin<"__builtin_ia32_pternlogq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_q_128 :
GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_q_256 :
GCCBuiltin<"__builtin_ia32_pternlogq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_q_256 :
GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_q_512 :
GCCBuiltin<"__builtin_ia32_pternlogq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_q_512 :
GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
}
// Misc.
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_cmp_ps_512 :

View File

@ -16252,6 +16252,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
case TERLOG_OP_MASK:
case TERLOG_OP_MASKZ: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
SDValue Mask = Op.getOperand(5);
EVT VT = Op.getValueType();
SDValue PassThru = Src1;
// Set PassThru element.
if (IntrData->Type == TERLOG_OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Src3, Src4),
Mask, PassThru, Subtarget, DAG);
}
case FPCLASS: {
// FPclass intrinsics with mask
SDValue Src1 = Op.getOperand(1);
@ -19915,6 +19932,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";

View File

@ -389,9 +389,11 @@ namespace llvm {
VPERMIV3,
VPERMI,
VPERM2X128,
//Fix Up Special Packed Float32/64 values
// Bitwise ternary logic
VPTERNLOG,
// Fix Up Special Packed Float32/64 values
VFIXUPIMM,
//Range Restriction Calculation For Packed Pairs of Float32/64 values
// Range Restriction Calculation For Packed Pairs of Float32/64 values
VRANGE,
// Reduce - Perform Reduction Transformation on scalar\packed FP
VREDUCE,

View File

@ -7148,3 +7148,48 @@ multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
HasBWI>, EVEX_4V;
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
let Constraints = "$src1 = $dst" in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT _.RC:$src3),
(i8 imm:$src4))>, AVX512AIi8Base, EVEX_4V;
let mayLoad = 1 in {
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 imm:$src4))>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr##", $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i8 imm:$src4))>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}
}// Constraints = "$src1 = $dst"
}
multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
let Predicates = [HasAVX512] in
defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
}
}
defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;

View File

@ -293,6 +293,10 @@ def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
def SDTTernlog : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>,
SDTCisInt<4>]>;
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
@ -353,6 +357,7 @@ def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;

View File

@ -29,7 +29,8 @@ enum IntrinsicType {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC
EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
TERLOG_OP_MASK, TERLOG_OP_MASKZ
};
struct IntrinsicData {
@ -1145,6 +1146,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK,
X86ISD::UNPCKH, 0),
X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK,
@ -1489,7 +1502,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,

View File

@ -4541,3 +4541,74 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}

View File

@ -5022,3 +5022,147 @@ define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i3
%res4 = add <8 x i32> %res2, %res3
ret <8 x i32> %res4
}
declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1}
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2
}
declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
%res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2
}
declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1}
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
}
declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
%res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
}

View File

@ -17612,3 +17612,124 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: vrcp14ss -516(%rdx), %xmm8, %xmm8
// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0xfc,0xfd,0xff,0xff]
vrcp14ss -516(%rdx), %xmm8, %xmm8
// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12
// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0xab]
vpternlogd $0xab, %zmm20, %zmm14, %zmm12
// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7}
// CHECK: encoding: [0x62,0x33,0x0d,0x4f,0x25,0xe4,0xab]
vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7}
// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7} {z}
// CHECK: encoding: [0x62,0x33,0x0d,0xcf,0x25,0xe4,0xab]
vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7} {z}
// CHECK: vpternlogd $123, %zmm20, %zmm14, %zmm12
// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0x7b]
vpternlogd $0x7b, %zmm20, %zmm14, %zmm12
// CHECK: vpternlogd $123, (%rcx), %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x21,0x7b]
vpternlogd $0x7b, (%rcx), %zmm14, %zmm12
// CHECK: vpternlogd $123, 291(%rax,%r14,8), %zmm14, %zmm12
// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpternlogd $0x7b, 291(%rax,%r14,8), %zmm14, %zmm12
// CHECK: vpternlogd $123, (%rcx){1to16}, %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x21,0x7b]
vpternlogd $0x7b, (%rcx){1to16}, %zmm14, %zmm12
// CHECK: vpternlogd $123, 8128(%rdx), %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x7f,0x7b]
vpternlogd $0x7b, 8128(%rdx), %zmm14, %zmm12
// CHECK: vpternlogd $123, 8192(%rdx), %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0x00,0x20,0x00,0x00,0x7b]
vpternlogd $0x7b, 8192(%rdx), %zmm14, %zmm12
// CHECK: vpternlogd $123, -8192(%rdx), %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x80,0x7b]
vpternlogd $0x7b, -8192(%rdx), %zmm14, %zmm12
// CHECK: vpternlogd $123, -8256(%rdx), %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
vpternlogd $0x7b, -8256(%rdx), %zmm14, %zmm12
// CHECK: vpternlogd $123, 508(%rdx){1to16}, %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x7f,0x7b]
vpternlogd $0x7b, 508(%rdx){1to16}, %zmm14, %zmm12
// CHECK: vpternlogd $123, 512(%rdx){1to16}, %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0x00,0x02,0x00,0x00,0x7b]
vpternlogd $0x7b, 512(%rdx){1to16}, %zmm14, %zmm12
// CHECK: vpternlogd $123, -512(%rdx){1to16}, %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x80,0x7b]
vpternlogd $0x7b, -512(%rdx){1to16}, %zmm14, %zmm12
// CHECK: vpternlogd $123, -516(%rdx){1to16}, %zmm14, %zmm12
// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vpternlogd $0x7b, -516(%rdx){1to16}, %zmm14, %zmm12
// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15
// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0xab]
vpternlogq $0xab, %zmm21, %zmm2, %zmm15
// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3}
// CHECK: encoding: [0x62,0x33,0xed,0x4b,0x25,0xfd,0xab]
vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3}
// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3} {z}
// CHECK: encoding: [0x62,0x33,0xed,0xcb,0x25,0xfd,0xab]
vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3} {z}
// CHECK: vpternlogq $123, %zmm21, %zmm2, %zmm15
// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0x7b]
vpternlogq $0x7b, %zmm21, %zmm2, %zmm15
// CHECK: vpternlogq $123, (%rcx), %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x39,0x7b]
vpternlogq $0x7b, (%rcx), %zmm2, %zmm15
// CHECK: vpternlogq $123, 291(%rax,%r14,8), %zmm2, %zmm15
// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpternlogq $0x7b, 291(%rax,%r14,8), %zmm2, %zmm15
// CHECK: vpternlogq $123, (%rcx){1to8}, %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x39,0x7b]
vpternlogq $0x7b, (%rcx){1to8}, %zmm2, %zmm15
// CHECK: vpternlogq $123, 8128(%rdx), %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x7f,0x7b]
vpternlogq $0x7b, 8128(%rdx), %zmm2, %zmm15
// CHECK: vpternlogq $123, 8192(%rdx), %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0x00,0x20,0x00,0x00,0x7b]
vpternlogq $0x7b, 8192(%rdx), %zmm2, %zmm15
// CHECK: vpternlogq $123, -8192(%rdx), %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x80,0x7b]
vpternlogq $0x7b, -8192(%rdx), %zmm2, %zmm15
// CHECK: vpternlogq $123, -8256(%rdx), %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0xc0,0xdf,0xff,0xff,0x7b]
vpternlogq $0x7b, -8256(%rdx), %zmm2, %zmm15
// CHECK: vpternlogq $123, 1016(%rdx){1to8}, %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x7f,0x7b]
vpternlogq $0x7b, 1016(%rdx){1to8}, %zmm2, %zmm15
// CHECK: vpternlogq $123, 1024(%rdx){1to8}, %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0x00,0x04,0x00,0x00,0x7b]
vpternlogq $0x7b, 1024(%rdx){1to8}, %zmm2, %zmm15
// CHECK: vpternlogq $123, -1024(%rdx){1to8}, %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x80,0x7b]
vpternlogq $0x7b, -1024(%rdx){1to8}, %zmm2, %zmm15
// CHECK: vpternlogq $123, -1032(%rdx){1to8}, %zmm2, %zmm15
// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0xf8,0xfb,0xff,0xff,0x7b]
vpternlogq $0x7b, -1032(%rdx){1to8}, %zmm2, %zmm15

View File

@ -21322,3 +21322,244 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vcvttpd2udq -1032(%rdx){1to4}, %xmm28
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0xf8,0xfb,0xff,0xff]
vcvttpd2udq -1032(%rdx){1to4}, %xmm28
// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27
// CHECK: encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0xab]
vpternlogd $0xab, %xmm25, %xmm19, %xmm27
// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7}
// CHECK: encoding: [0x62,0x03,0x65,0x07,0x25,0xd9,0xab]
vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7}
// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7} {z}
// CHECK: encoding: [0x62,0x03,0x65,0x87,0x25,0xd9,0xab]
vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7} {z}
// CHECK: vpternlogd $123, %xmm25, %xmm19, %xmm27
// CHECK: encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0x7b]
vpternlogd $0x7b, %xmm25, %xmm19, %xmm27
// CHECK: vpternlogd $123, (%rcx), %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x19,0x7b]
vpternlogd $0x7b, (%rcx), %xmm19, %xmm27
// CHECK: vpternlogd $123, 291(%rax,%r14,8), %xmm19, %xmm27
// CHECK: encoding: [0x62,0x23,0x65,0x00,0x25,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpternlogd $0x7b, 291(%rax,%r14,8), %xmm19, %xmm27
// CHECK: vpternlogd $123, (%rcx){1to4}, %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x19,0x7b]
vpternlogd $0x7b, (%rcx){1to4}, %xmm19, %xmm27
// CHECK: vpternlogd $123, 2032(%rdx), %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x7f,0x7b]
vpternlogd $0x7b, 2032(%rdx), %xmm19, %xmm27
// CHECK: vpternlogd $123, 2048(%rdx), %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0x00,0x08,0x00,0x00,0x7b]
vpternlogd $0x7b, 2048(%rdx), %xmm19, %xmm27
// CHECK: vpternlogd $123, -2048(%rdx), %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x80,0x7b]
vpternlogd $0x7b, -2048(%rdx), %xmm19, %xmm27
// CHECK: vpternlogd $123, -2064(%rdx), %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vpternlogd $0x7b, -2064(%rdx), %xmm19, %xmm27
// CHECK: vpternlogd $123, 508(%rdx){1to4}, %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x7f,0x7b]
vpternlogd $0x7b, 508(%rdx){1to4}, %xmm19, %xmm27
// CHECK: vpternlogd $123, 512(%rdx){1to4}, %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0x00,0x02,0x00,0x00,0x7b]
vpternlogd $0x7b, 512(%rdx){1to4}, %xmm19, %xmm27
// CHECK: vpternlogd $123, -512(%rdx){1to4}, %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x80,0x7b]
vpternlogd $0x7b, -512(%rdx){1to4}, %xmm19, %xmm27
// CHECK: vpternlogd $123, -516(%rdx){1to4}, %xmm19, %xmm27
// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
vpternlogd $0x7b, -516(%rdx){1to4}, %xmm19, %xmm27
// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29
// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0xab]
vpternlogd $0xab, %ymm20, %ymm17, %ymm29
// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3}
// CHECK: encoding: [0x62,0x23,0x75,0x23,0x25,0xec,0xab]
vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3}
// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3} {z}
// CHECK: encoding: [0x62,0x23,0x75,0xa3,0x25,0xec,0xab]
vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3} {z}
// CHECK: vpternlogd $123, %ymm20, %ymm17, %ymm29
// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0x7b]
vpternlogd $0x7b, %ymm20, %ymm17, %ymm29
// CHECK: vpternlogd $123, (%rcx), %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x29,0x7b]
vpternlogd $0x7b, (%rcx), %ymm17, %ymm29
// CHECK: vpternlogd $123, 291(%rax,%r14,8), %ymm17, %ymm29
// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpternlogd $0x7b, 291(%rax,%r14,8), %ymm17, %ymm29
// CHECK: vpternlogd $123, (%rcx){1to8}, %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x29,0x7b]
vpternlogd $0x7b, (%rcx){1to8}, %ymm17, %ymm29
// CHECK: vpternlogd $123, 4064(%rdx), %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x7f,0x7b]
vpternlogd $0x7b, 4064(%rdx), %ymm17, %ymm29
// CHECK: vpternlogd $123, 4096(%rdx), %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0x00,0x10,0x00,0x00,0x7b]
vpternlogd $0x7b, 4096(%rdx), %ymm17, %ymm29
// CHECK: vpternlogd $123, -4096(%rdx), %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x80,0x7b]
vpternlogd $0x7b, -4096(%rdx), %ymm17, %ymm29
// CHECK: vpternlogd $123, -4128(%rdx), %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0xe0,0xef,0xff,0xff,0x7b]
vpternlogd $0x7b, -4128(%rdx), %ymm17, %ymm29
// CHECK: vpternlogd $123, 508(%rdx){1to8}, %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x7f,0x7b]
vpternlogd $0x7b, 508(%rdx){1to8}, %ymm17, %ymm29
// CHECK: vpternlogd $123, 512(%rdx){1to8}, %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0x00,0x02,0x00,0x00,0x7b]
vpternlogd $0x7b, 512(%rdx){1to8}, %ymm17, %ymm29
// CHECK: vpternlogd $123, -512(%rdx){1to8}, %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x80,0x7b]
vpternlogd $0x7b, -512(%rdx){1to8}, %ymm17, %ymm29
// CHECK: vpternlogd $123, -516(%rdx){1to8}, %ymm17, %ymm29
// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
vpternlogd $0x7b, -516(%rdx){1to8}, %ymm17, %ymm29
// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17
// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0xab]
vpternlogq $0xab, %xmm22, %xmm25, %xmm17
// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1}
// CHECK: encoding: [0x62,0xa3,0xb5,0x01,0x25,0xce,0xab]
vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1}
// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1} {z}
// CHECK: encoding: [0x62,0xa3,0xb5,0x81,0x25,0xce,0xab]
vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1} {z}
// CHECK: vpternlogq $123, %xmm22, %xmm25, %xmm17
// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0x7b]
vpternlogq $0x7b, %xmm22, %xmm25, %xmm17
// CHECK: vpternlogq $123, (%rcx), %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x09,0x7b]
vpternlogq $0x7b, (%rcx), %xmm25, %xmm17
// CHECK: vpternlogq $123, 291(%rax,%r14,8), %xmm25, %xmm17
// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpternlogq $0x7b, 291(%rax,%r14,8), %xmm25, %xmm17
// CHECK: vpternlogq $123, (%rcx){1to2}, %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x09,0x7b]
vpternlogq $0x7b, (%rcx){1to2}, %xmm25, %xmm17
// CHECK: vpternlogq $123, 2032(%rdx), %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x7f,0x7b]
vpternlogq $0x7b, 2032(%rdx), %xmm25, %xmm17
// CHECK: vpternlogq $123, 2048(%rdx), %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0x00,0x08,0x00,0x00,0x7b]
vpternlogq $0x7b, 2048(%rdx), %xmm25, %xmm17
// CHECK: vpternlogq $123, -2048(%rdx), %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x80,0x7b]
vpternlogq $0x7b, -2048(%rdx), %xmm25, %xmm17
// CHECK: vpternlogq $123, -2064(%rdx), %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
vpternlogq $0x7b, -2064(%rdx), %xmm25, %xmm17
// CHECK: vpternlogq $123, 1016(%rdx){1to2}, %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x7f,0x7b]
vpternlogq $0x7b, 1016(%rdx){1to2}, %xmm25, %xmm17
// CHECK: vpternlogq $123, 1024(%rdx){1to2}, %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0x00,0x04,0x00,0x00,0x7b]
vpternlogq $0x7b, 1024(%rdx){1to2}, %xmm25, %xmm17
// CHECK: vpternlogq $123, -1024(%rdx){1to2}, %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x80,0x7b]
vpternlogq $0x7b, -1024(%rdx){1to2}, %xmm25, %xmm17
// CHECK: vpternlogq $123, -1032(%rdx){1to2}, %xmm25, %xmm17
// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vpternlogq $0x7b, -1032(%rdx){1to2}, %xmm25, %xmm17
// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26
// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0xab]
vpternlogq $0xab, %ymm25, %ymm23, %ymm26
// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6}
// CHECK: encoding: [0x62,0x03,0xc5,0x26,0x25,0xd1,0xab]
vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6}
// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6} {z}
// CHECK: encoding: [0x62,0x03,0xc5,0xa6,0x25,0xd1,0xab]
vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6} {z}
// CHECK: vpternlogq $123, %ymm25, %ymm23, %ymm26
// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0x7b]
vpternlogq $0x7b, %ymm25, %ymm23, %ymm26
// CHECK: vpternlogq $123, (%rcx), %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x11,0x7b]
vpternlogq $0x7b, (%rcx), %ymm23, %ymm26
// CHECK: vpternlogq $123, 291(%rax,%r14,8), %ymm23, %ymm26
// CHECK: encoding: [0x62,0x23,0xc5,0x20,0x25,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpternlogq $0x7b, 291(%rax,%r14,8), %ymm23, %ymm26
// CHECK: vpternlogq $123, (%rcx){1to4}, %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x11,0x7b]
vpternlogq $0x7b, (%rcx){1to4}, %ymm23, %ymm26
// CHECK: vpternlogq $123, 4064(%rdx), %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x7f,0x7b]
vpternlogq $0x7b, 4064(%rdx), %ymm23, %ymm26
// CHECK: vpternlogq $123, 4096(%rdx), %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0x00,0x10,0x00,0x00,0x7b]
vpternlogq $0x7b, 4096(%rdx), %ymm23, %ymm26
// CHECK: vpternlogq $123, -4096(%rdx), %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x80,0x7b]
vpternlogq $0x7b, -4096(%rdx), %ymm23, %ymm26
// CHECK: vpternlogq $123, -4128(%rdx), %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0xe0,0xef,0xff,0xff,0x7b]
vpternlogq $0x7b, -4128(%rdx), %ymm23, %ymm26
// CHECK: vpternlogq $123, 1016(%rdx){1to4}, %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x7f,0x7b]
vpternlogq $0x7b, 1016(%rdx){1to4}, %ymm23, %ymm26
// CHECK: vpternlogq $123, 1024(%rdx){1to4}, %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0x00,0x04,0x00,0x00,0x7b]
vpternlogq $0x7b, 1024(%rdx){1to4}, %ymm23, %ymm26
// CHECK: vpternlogq $123, -1024(%rdx){1to4}, %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x80,0x7b]
vpternlogq $0x7b, -1024(%rdx){1to4}, %ymm23, %ymm26
// CHECK: vpternlogq $123, -1032(%rdx){1to4}, %ymm23, %ymm26
// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0xf8,0xfb,0xff,0xff,0x7b]
vpternlogq $0x7b, -1032(%rdx){1to4}, %ymm23, %ymm26