1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[X86] Convert all uses of WriteFLogic/WriteVecLogic to X86SchedWriteWidths.

In preparation of splitting WriteVecLogic by vector width.

llvm-svn: 331256
This commit is contained in:
Simon Pilgrim 2018-05-01 12:15:29 +00:00
parent 6f1ed8a0db
commit e780627a3e
2 changed files with 157 additions and 141 deletions

View File

@ -2830,7 +2830,8 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
sched, HasBWI>, VEX, PS, VEX_W;
}
defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, WriteVecLogic>;
// TODO - do we need a X86SchedWriteWidths::KMASK type?
defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
@ -2876,12 +2877,13 @@ def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
defm KAND : avx512_mask_binop_all<0x41, "kand", and, WriteVecLogic, 1>;
defm KOR : avx512_mask_binop_all<0x45, "kor", or, WriteVecLogic, 1>;
defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, WriteVecLogic, 1>;
defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, WriteVecLogic, 1>;
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, WriteVecLogic, 0>;
defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, WriteVecLogic, 1, HasDQI>;
// TODO - do we need a X86SchedWriteWidths::KMASK type?
defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
Instruction Inst> {
@ -2960,8 +2962,9 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX, PD, VEX_W;
}
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, WriteVecLogic>;
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, WriteVecLogic, HasDQI>;
// TODO - do we need a X86SchedWriteWidths::KMASK type?
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@ -3718,7 +3721,7 @@ let hasSideEffects = 0 in
def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq.s\t{$src, $dst|$dst, $src}", []>,
EVEX, VEX_W, Sched<[WriteVecLogic]>;
EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
} // ExeDomain = SSEPackedInt
// Move Scalar Single to Double Int
@ -4149,7 +4152,7 @@ let Predicates = [HasAVX512] in {
(VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
}
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
let AddedComplexity = 15 in
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src),
@ -4763,23 +4766,23 @@ multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
SDNode OpNodeMsk, X86FoldableSchedWrite sched,
SDNode OpNodeMsk, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo,
bit IsCommutable = 0> {
let Predicates = [HasAVX512] in
defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched,
defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched,
defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched,
defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, X86FoldableSchedWrite sched,
SDNode OpNode, X86SchedWriteWidths sched,
bit IsCommutable = 0> {
defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
avx512vl_i64_info, IsCommutable>,
@ -4789,10 +4792,14 @@ multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
EVEX_CD8<32, CD8VF>;
}
defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, WriteVecLogic, 1>;
defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, WriteVecLogic, 1>;
defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, WriteVecLogic, 1>;
defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, WriteVecLogic>;
defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
SchedWriteVecLogic, 1>;
defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
SchedWriteVecLogic, 1>;
defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
SchedWriteVecLogic, 1>;
defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
SchedWriteVecLogic>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@ -5007,83 +5014,86 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
}
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Predicate prd, X86FoldableSchedWrite sched,
X86FoldableSchedWrite schedY,
Predicate prd, X86SchedWriteWidths sched,
bit IsCommutable = 0> {
let Predicates = [prd] in {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
schedY, IsCommutable>, EVEX_V512, PS,
sched.ZMM, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
schedY, IsCommutable>, EVEX_V512, PD, VEX_W,
sched.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
// Define only if AVX512VL feature is present.
let Predicates = [prd, HasVLX] in {
defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
sched, IsCommutable>, EVEX_V128, PS,
sched.XMM, IsCommutable>, EVEX_V128, PS,
EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
schedY, IsCommutable>, EVEX_V256, PS,
sched.YMM, IsCommutable>, EVEX_V256, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
sched, IsCommutable>, EVEX_V128, PD, VEX_W,
sched.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
schedY, IsCommutable>, EVEX_V256, PD, VEX_W,
sched.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
}
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86FoldableSchedWrite sched> {
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
X86SchedWriteWidths sched> {
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86FoldableSchedWrite sched> {
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
X86SchedWriteWidths sched> {
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
WriteFAdd, WriteFAdd, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, WriteFAdd>;
SchedWriteFAdd, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAdd>;
defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
WriteFMul, WriteFMul, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, WriteFMul>;
SchedWriteFMul, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMul>;
defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
WriteFAdd, WriteFAdd>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>;
SchedWriteFAdd>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAdd>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
WriteFDiv, WriteFDiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>;
SchedWriteFDiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDiv>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
WriteFCmp, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>;
SchedWriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmp>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
WriteFCmp, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>;
SchedWriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmp>;
let isCodeGenOnly = 1 in {
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
WriteFCmp, WriteFCmp, 1>;
SchedWriteFCmp, 1>;
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
WriteFCmp, WriteFCmp, 1>;
SchedWriteFCmp, 1>;
}
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
WriteFLogic, WriteFLogicY, 1>;
SchedWriteFLogic, 1>;
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
WriteFLogic, WriteFLogicY, 0>;
SchedWriteFLogic, 0>;
defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
WriteFLogic, WriteFLogicY, 1>;
SchedWriteFLogic, 1>;
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
WriteFLogic, WriteFLogicY, 1>;
SchedWriteFLogic, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
@ -5355,17 +5365,17 @@ multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
}
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
string Suffix> {
let Predicates = [HasAVX512] in
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info512, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info256, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info128, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128;
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, Suffix>,
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
@ -5374,7 +5384,7 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
}
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
X86FoldableSchedWrite sched> {
X86SchedWriteWidths sched> {
defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
avx512vl_i32_info, "D">;
defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
@ -5382,23 +5392,23 @@ multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
PatFrag OpNode, X86FoldableSchedWrite sched> {
PatFrag OpNode, X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in {
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v32i16_info, "W">,
EVEX_V512, VEX_W;
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v64i8_info, "B">,
EVEX_V512;
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
v32i16_info, "W">, EVEX_V512, VEX_W;
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
v64i8_info, "B">, EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v16i16x_info, "W">,
EVEX_V256, VEX_W;
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v8i16x_info, "W">,
EVEX_V128, VEX_W;
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v32i8x_info, "B">,
EVEX_V256;
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v16i8x_info, "B">,
EVEX_V128;
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
v16i16x_info, "W">, EVEX_V256, VEX_W;
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
v8i16x_info, "W">, EVEX_V128, VEX_W;
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
v32i8x_info, "B">, EVEX_V256;
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
v16i8x_info, "B">, EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
@ -5418,19 +5428,19 @@ def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 4))>;
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
PatFrag OpNode, X86FoldableSchedWrite sched> :
avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, sched>,
PatFrag OpNode, X86SchedWriteWidths sched> :
avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
WriteVecLogic>, T8PD;
SchedWriteVecLogic>, T8PD;
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
WriteVecLogic>, T8XS;
SchedWriteVecLogic>, T8XS;
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
@ -9886,9 +9896,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr
//===----------------------------------------------------------------------===//
defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
WriteFShuffle, WriteFShuffle>;
SchedWriteFShuffle>;
defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
WriteFShuffle, WriteFShuffle>;
SchedWriteFShuffle>;
defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
WriteShuffle, HasBWI>;

View File

@ -2296,32 +2296,33 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
ValueType OpVT128, ValueType OpVT256,
X86FoldableSchedWrite sched, bit IsCommutable,
X86SchedWriteWidths sched, bit IsCommutable,
Predicate prd> {
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, loadv2i64, i128mem, sched, IsCommutable, 0>, VEX_4V, VEX_WIG;
VR128, loadv2i64, i128mem, sched.XMM,
IsCommutable, 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
memopv2i64, i128mem, sched, IsCommutable, 1>;
memopv2i64, i128mem, sched.XMM, IsCommutable, 1>;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, sched,
OpVT256, VR256, loadv4i64, i256mem, sched.YMM,
IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
}
// These are ordered here for pattern ordering requirements with the fp versions
defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
WriteVecLogic, 1, NoVLX>;
SchedWriteVecLogic, 1, NoVLX>;
defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
WriteVecLogic, 1, NoVLX>;
SchedWriteVecLogic, 1, NoVLX>;
defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
WriteVecLogic, 1, NoVLX>;
SchedWriteVecLogic, 1, NoVLX>;
defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
WriteVecLogic, 0, NoVLX>;
SchedWriteVecLogic, 0, NoVLX>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@ -3305,57 +3306,57 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
} // ExeDomain = SSEPackedInt
defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
WriteVecALU, 1, NoVLX>;
SchedWriteVecALU, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
WriteVecALU, 1, NoVLX>;
SchedWriteVecALU, 1, NoVLX>;
defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
WriteVecIMul, 1, NoVLX_Or_NoBWI>;
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
WriteVecIMul, 1, NoVLX_Or_NoBWI>;
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
WriteVecIMul, 1, NoVLX_Or_NoBWI>;
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
WriteVecALU, 0, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
WriteVecALU, 0, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
WriteVecALU, 0, NoVLX>;
SchedWriteVecALU, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
WriteVecALU, 0, NoVLX>;
SchedWriteVecALU, 0, NoVLX>;
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
WriteVecALU, 0, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
WriteVecALU, 0, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
WriteVecALU, 0, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
WriteVecALU, 0, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
WriteVecALU, 1, NoVLX_Or_NoBWI>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
WriteVecIMul, 1, NoVLX>;
SchedWriteVecIMul, 1, NoVLX>;
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
@ -3493,17 +3494,17 @@ let ExeDomain = SSEPackedInt in {
//===---------------------------------------------------------------------===//
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
WriteVecALU, 1, TruePredicate>;
SchedWriteVecALU, 1, TruePredicate>;
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
WriteVecALU, 1, TruePredicate>;
SchedWriteVecALU, 1, TruePredicate>;
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
WriteVecALU, 1, TruePredicate>;
SchedWriteVecALU, 1, TruePredicate>;
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
WriteVecALU, 0, TruePredicate>;
SchedWriteVecALU, 0, TruePredicate>;
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
WriteVecALU, 0, TruePredicate>;
SchedWriteVecALU, 0, TruePredicate>;
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
WriteVecALU, 0, TruePredicate>;
SchedWriteVecALU, 0, TruePredicate>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
@ -4184,7 +4185,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
// For disassembler only
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
SchedRW = [WriteVecLogic] in {
SchedRW = [SchedWriteVecLogic.XMM] in {
def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
@ -4226,7 +4227,7 @@ let Predicates = [UseSSE2], AddedComplexity = 20 in {
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
//
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@ -5650,56 +5651,61 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
Sched<[WriteVecLogic]>, VEX, VEX_WIG;
Sched<[SchedWriteVecLogic.XMM]>, VEX, VEX_WIG;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_WIG;
Sched<[SchedWriteVecLogic.XMM.Folded, ReadAfterLd]>,
VEX, VEX_WIG;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
Sched<[WriteVecLogic]>, VEX, VEX_L, VEX_WIG;
Sched<[SchedWriteVecLogic.YMM]>, VEX, VEX_L, VEX_WIG;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L, VEX_WIG;
Sched<[SchedWriteVecLogic.YMM.Folded, ReadAfterLd]>,
VEX, VEX_L, VEX_WIG;
}
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
Sched<[WriteVecLogic]>;
Sched<[SchedWriteVecLogic.XMM]>;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
Sched<[WriteVecLogicLd, ReadAfterLd]>;
Sched<[SchedWriteVecLogic.XMM.Folded, ReadAfterLd]>;
}
// The bit test instructions below are AVX only
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
X86FoldableSchedWrite sched> {
def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>,
Sched<[WriteVecLogic]>, VEX;
Sched<[sched]>, VEX;
def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX;
Sched<[sched.Folded, ReadAfterLd]>, VEX;
}
let Defs = [EFLAGS], Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>,
VEX_L;
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32,
SchedWriteVecLogic.XMM>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32,
SchedWriteVecLogic.YMM>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>;
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>,
VEX_L;
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64,
SchedWriteVecLogic.XMM>;
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64,
SchedWriteVecLogic.YMM>, VEX_L;
}
}