mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86] Convert all uses of WriteFLogic/WriteVecLogic to X86SchedWriteWidths.
In preparation of splitting WriteVecLogic by vector width. llvm-svn: 331256
This commit is contained in:
parent
6f1ed8a0db
commit
e780627a3e
@ -2830,7 +2830,8 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
|
||||
sched, HasBWI>, VEX, PS, VEX_W;
|
||||
}
|
||||
|
||||
defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, WriteVecLogic>;
|
||||
// TODO - do we need a X86SchedWriteWidths::KMASK type?
|
||||
defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
|
||||
|
||||
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
|
||||
let Predicates = [HasAVX512, NoDQI] in
|
||||
@ -2876,12 +2877,13 @@ def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
|
||||
def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
|
||||
def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
|
||||
|
||||
defm KAND : avx512_mask_binop_all<0x41, "kand", and, WriteVecLogic, 1>;
|
||||
defm KOR : avx512_mask_binop_all<0x45, "kor", or, WriteVecLogic, 1>;
|
||||
defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, WriteVecLogic, 1>;
|
||||
defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, WriteVecLogic, 1>;
|
||||
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, WriteVecLogic, 0>;
|
||||
defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, WriteVecLogic, 1, HasDQI>;
|
||||
// TODO - do we need a X86SchedWriteWidths::KMASK type?
|
||||
defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
|
||||
defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
|
||||
defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
|
||||
defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
|
||||
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
|
||||
defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
|
||||
|
||||
multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
|
||||
Instruction Inst> {
|
||||
@ -2960,8 +2962,9 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
VEX, PD, VEX_W;
|
||||
}
|
||||
|
||||
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, WriteVecLogic>;
|
||||
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, WriteVecLogic, HasDQI>;
|
||||
// TODO - do we need a X86SchedWriteWidths::KMASK type?
|
||||
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
|
||||
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
|
||||
|
||||
// Mask shift
|
||||
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||
@ -3718,7 +3721,7 @@ let hasSideEffects = 0 in
|
||||
def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
|
||||
(ins VR128X:$src),
|
||||
"vmovq.s\t{$src, $dst|$dst, $src}", []>,
|
||||
EVEX, VEX_W, Sched<[WriteVecLogic]>;
|
||||
EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move Scalar Single to Double Int
|
||||
@ -4149,7 +4152,7 @@ let Predicates = [HasAVX512] in {
|
||||
(VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
|
||||
let AddedComplexity = 15 in
|
||||
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
|
||||
(ins VR128X:$src),
|
||||
@ -4763,23 +4766,23 @@ multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
|
||||
|
||||
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDNode OpNodeMsk, X86FoldableSchedWrite sched,
|
||||
SDNode OpNodeMsk, X86SchedWriteWidths sched,
|
||||
AVX512VLVectorVTInfo VTInfo,
|
||||
bit IsCommutable = 0> {
|
||||
let Predicates = [HasAVX512] in
|
||||
defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched,
|
||||
defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
|
||||
VTInfo.info512, IsCommutable>, EVEX_V512;
|
||||
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched,
|
||||
defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
|
||||
VTInfo.info256, IsCommutable>, EVEX_V256;
|
||||
defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched,
|
||||
defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
|
||||
VTInfo.info128, IsCommutable>, EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
|
||||
SDNode OpNode, X86FoldableSchedWrite sched,
|
||||
SDNode OpNode, X86SchedWriteWidths sched,
|
||||
bit IsCommutable = 0> {
|
||||
defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
|
||||
avx512vl_i64_info, IsCommutable>,
|
||||
@ -4789,10 +4792,14 @@ multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
}
|
||||
|
||||
defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, WriteVecLogic, 1>;
|
||||
defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, WriteVecLogic, 1>;
|
||||
defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, WriteVecLogic, 1>;
|
||||
defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, WriteVecLogic>;
|
||||
defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
|
||||
SchedWriteVecLogic, 1>;
|
||||
defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
|
||||
SchedWriteVecLogic, 1>;
|
||||
defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
|
||||
SchedWriteVecLogic, 1>;
|
||||
defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
|
||||
SchedWriteVecLogic>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 FP arithmetic
|
||||
@ -5007,83 +5014,86 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
Predicate prd, X86FoldableSchedWrite sched,
|
||||
X86FoldableSchedWrite schedY,
|
||||
Predicate prd, X86SchedWriteWidths sched,
|
||||
bit IsCommutable = 0> {
|
||||
let Predicates = [prd] in {
|
||||
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
|
||||
schedY, IsCommutable>, EVEX_V512, PS,
|
||||
sched.ZMM, IsCommutable>, EVEX_V512, PS,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
|
||||
schedY, IsCommutable>, EVEX_V512, PD, VEX_W,
|
||||
sched.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
// Define only if AVX512VL feature is present.
|
||||
let Predicates = [prd, HasVLX] in {
|
||||
defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
|
||||
sched, IsCommutable>, EVEX_V128, PS,
|
||||
sched.XMM, IsCommutable>, EVEX_V128, PS,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
|
||||
schedY, IsCommutable>, EVEX_V256, PS,
|
||||
sched.YMM, IsCommutable>, EVEX_V256, PS,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
|
||||
sched, IsCommutable>, EVEX_V128, PD, VEX_W,
|
||||
sched.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
|
||||
schedY, IsCommutable>, EVEX_V256, PD, VEX_W,
|
||||
sched.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
|
||||
X86FoldableSchedWrite sched> {
|
||||
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>,
|
||||
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>,
|
||||
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
|
||||
X86SchedWriteWidths sched> {
|
||||
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
|
||||
v16f32_info>,
|
||||
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
|
||||
v8f64_info>,
|
||||
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
|
||||
X86FoldableSchedWrite sched> {
|
||||
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>,
|
||||
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>,
|
||||
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
|
||||
X86SchedWriteWidths sched> {
|
||||
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
|
||||
v16f32_info>,
|
||||
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
|
||||
v8f64_info>,
|
||||
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
|
||||
WriteFAdd, WriteFAdd, 1>,
|
||||
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, WriteFAdd>;
|
||||
SchedWriteFAdd, 1>,
|
||||
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAdd>;
|
||||
defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
|
||||
WriteFMul, WriteFMul, 1>,
|
||||
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, WriteFMul>;
|
||||
SchedWriteFMul, 1>,
|
||||
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMul>;
|
||||
defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
|
||||
WriteFAdd, WriteFAdd>,
|
||||
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>;
|
||||
SchedWriteFAdd>,
|
||||
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAdd>;
|
||||
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
|
||||
WriteFDiv, WriteFDiv>,
|
||||
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>;
|
||||
SchedWriteFDiv>,
|
||||
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDiv>;
|
||||
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
|
||||
WriteFCmp, WriteFCmp, 0>,
|
||||
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>;
|
||||
SchedWriteFCmp, 0>,
|
||||
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmp>;
|
||||
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
|
||||
WriteFCmp, WriteFCmp, 0>,
|
||||
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>;
|
||||
SchedWriteFCmp, 0>,
|
||||
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmp>;
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
|
||||
WriteFCmp, WriteFCmp, 1>;
|
||||
SchedWriteFCmp, 1>;
|
||||
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
|
||||
WriteFCmp, WriteFCmp, 1>;
|
||||
SchedWriteFCmp, 1>;
|
||||
}
|
||||
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
|
||||
WriteFLogic, WriteFLogicY, 1>;
|
||||
SchedWriteFLogic, 1>;
|
||||
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
|
||||
WriteFLogic, WriteFLogicY, 0>;
|
||||
SchedWriteFLogic, 0>;
|
||||
defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
|
||||
WriteFLogic, WriteFLogicY, 1>;
|
||||
SchedWriteFLogic, 1>;
|
||||
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
|
||||
WriteFLogic, WriteFLogicY, 1>;
|
||||
SchedWriteFLogic, 1>;
|
||||
|
||||
// Patterns catch floating point selects with bitcasted integer logic ops.
|
||||
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
|
||||
@ -5355,17 +5365,17 @@ multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _,
|
||||
X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
|
||||
string Suffix> {
|
||||
let Predicates = [HasAVX512] in
|
||||
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info512, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
|
||||
defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
|
||||
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info256, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
|
||||
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info128, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128;
|
||||
defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
|
||||
defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, Suffix>,
|
||||
avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
|
||||
}
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
|
||||
@ -5374,7 +5384,7 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
X86FoldableSchedWrite sched> {
|
||||
X86SchedWriteWidths sched> {
|
||||
defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
|
||||
avx512vl_i32_info, "D">;
|
||||
defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
|
||||
@ -5382,23 +5392,23 @@ multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
}
|
||||
|
||||
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
|
||||
PatFrag OpNode, X86FoldableSchedWrite sched> {
|
||||
PatFrag OpNode, X86SchedWriteWidths sched> {
|
||||
let Predicates = [HasBWI] in {
|
||||
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v32i16_info, "W">,
|
||||
EVEX_V512, VEX_W;
|
||||
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v64i8_info, "B">,
|
||||
EVEX_V512;
|
||||
defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
|
||||
v32i16_info, "W">, EVEX_V512, VEX_W;
|
||||
defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
|
||||
v64i8_info, "B">, EVEX_V512;
|
||||
}
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
|
||||
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v16i16x_info, "W">,
|
||||
EVEX_V256, VEX_W;
|
||||
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v8i16x_info, "W">,
|
||||
EVEX_V128, VEX_W;
|
||||
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v32i8x_info, "B">,
|
||||
EVEX_V256;
|
||||
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v16i8x_info, "B">,
|
||||
EVEX_V128;
|
||||
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
|
||||
v16i16x_info, "W">, EVEX_V256, VEX_W;
|
||||
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
|
||||
v8i16x_info, "W">, EVEX_V128, VEX_W;
|
||||
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
|
||||
v32i8x_info, "B">, EVEX_V256;
|
||||
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
|
||||
v16i8x_info, "B">, EVEX_V128;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
@ -5418,19 +5428,19 @@ def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
|
||||
(X86cmpm node:$src1, node:$src2, (i8 4))>;
|
||||
|
||||
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
|
||||
PatFrag OpNode, X86FoldableSchedWrite sched> :
|
||||
avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, sched>,
|
||||
PatFrag OpNode, X86SchedWriteWidths sched> :
|
||||
avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
|
||||
avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
|
||||
|
||||
defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
|
||||
WriteVecLogic>, T8PD;
|
||||
SchedWriteVecLogic>, T8PD;
|
||||
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
|
||||
WriteVecLogic>, T8XS;
|
||||
|
||||
SchedWriteVecLogic>, T8XS;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 Shift instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
|
||||
string OpcodeStr, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
|
||||
@ -9886,9 +9896,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
|
||||
WriteFShuffle, WriteFShuffle>;
|
||||
SchedWriteFShuffle>;
|
||||
defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
|
||||
WriteFShuffle, WriteFShuffle>;
|
||||
SchedWriteFShuffle>;
|
||||
|
||||
defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
|
||||
WriteShuffle, HasBWI>;
|
||||
|
@ -2296,32 +2296,33 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
||||
multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
|
||||
ValueType OpVT128, ValueType OpVT256,
|
||||
X86FoldableSchedWrite sched, bit IsCommutable,
|
||||
X86SchedWriteWidths sched, bit IsCommutable,
|
||||
Predicate prd> {
|
||||
let Predicates = [HasAVX, prd] in
|
||||
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
|
||||
VR128, loadv2i64, i128mem, sched, IsCommutable, 0>, VEX_4V, VEX_WIG;
|
||||
VR128, loadv2i64, i128mem, sched.XMM,
|
||||
IsCommutable, 0>, VEX_4V, VEX_WIG;
|
||||
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
|
||||
memopv2i64, i128mem, sched, IsCommutable, 1>;
|
||||
memopv2i64, i128mem, sched.XMM, IsCommutable, 1>;
|
||||
|
||||
let Predicates = [HasAVX2, prd] in
|
||||
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
|
||||
OpVT256, VR256, loadv4i64, i256mem, sched,
|
||||
OpVT256, VR256, loadv4i64, i256mem, sched.YMM,
|
||||
IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
// These are ordered here for pattern ordering requirements with the fp versions
|
||||
|
||||
defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
|
||||
WriteVecLogic, 1, NoVLX>;
|
||||
SchedWriteVecLogic, 1, NoVLX>;
|
||||
defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
|
||||
WriteVecLogic, 1, NoVLX>;
|
||||
SchedWriteVecLogic, 1, NoVLX>;
|
||||
defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
|
||||
WriteVecLogic, 1, NoVLX>;
|
||||
SchedWriteVecLogic, 1, NoVLX>;
|
||||
defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
|
||||
WriteVecLogic, 0, NoVLX>;
|
||||
SchedWriteVecLogic, 0, NoVLX>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE 1 & 2 - Logical Instructions
|
||||
@ -3305,57 +3306,57 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
|
||||
WriteVecALU, 1, NoVLX>;
|
||||
SchedWriteVecALU, 1, NoVLX>;
|
||||
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
|
||||
WriteVecALU, 1, NoVLX>;
|
||||
SchedWriteVecALU, 1, NoVLX>;
|
||||
defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
|
||||
WriteVecIMul, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
|
||||
defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
|
||||
WriteVecIMul, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
|
||||
defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
|
||||
WriteVecIMul, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
|
||||
defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
|
||||
WriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
|
||||
WriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
|
||||
WriteVecALU, 0, NoVLX>;
|
||||
SchedWriteVecALU, 0, NoVLX>;
|
||||
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
|
||||
WriteVecALU, 0, NoVLX>;
|
||||
SchedWriteVecALU, 0, NoVLX>;
|
||||
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
|
||||
WriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
|
||||
WriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
|
||||
WriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
|
||||
WriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
|
||||
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
|
||||
WriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
|
||||
defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
|
||||
WriteVecIMul, 1, NoVLX>;
|
||||
SchedWriteVecIMul, 1, NoVLX>;
|
||||
|
||||
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
|
||||
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
|
||||
@ -3493,17 +3494,17 @@ let ExeDomain = SSEPackedInt in {
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
|
||||
WriteVecALU, 1, TruePredicate>;
|
||||
SchedWriteVecALU, 1, TruePredicate>;
|
||||
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
|
||||
WriteVecALU, 1, TruePredicate>;
|
||||
SchedWriteVecALU, 1, TruePredicate>;
|
||||
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
|
||||
WriteVecALU, 1, TruePredicate>;
|
||||
SchedWriteVecALU, 1, TruePredicate>;
|
||||
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
|
||||
WriteVecALU, 0, TruePredicate>;
|
||||
SchedWriteVecALU, 0, TruePredicate>;
|
||||
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
|
||||
WriteVecALU, 0, TruePredicate>;
|
||||
SchedWriteVecALU, 0, TruePredicate>;
|
||||
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
|
||||
WriteVecALU, 0, TruePredicate>;
|
||||
SchedWriteVecALU, 0, TruePredicate>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSE2 - Packed Integer Shuffle Instructions
|
||||
@ -4184,7 +4185,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
|
||||
|
||||
// For disassembler only
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
|
||||
SchedRW = [WriteVecLogic] in {
|
||||
SchedRW = [SchedWriteVecLogic.XMM] in {
|
||||
def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
|
||||
def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
@ -4226,7 +4227,7 @@ let Predicates = [UseSSE2], AddedComplexity = 20 in {
|
||||
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
|
||||
// IA32 document. movq xmm1, xmm2 does clear the high bits.
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
|
||||
let AddedComplexity = 15 in
|
||||
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
@ -5650,56 +5651,61 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in {
|
||||
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
|
||||
Sched<[WriteVecLogic]>, VEX, VEX_WIG;
|
||||
Sched<[SchedWriteVecLogic.XMM]>, VEX, VEX_WIG;
|
||||
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
|
||||
Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_WIG;
|
||||
Sched<[SchedWriteVecLogic.XMM.Folded, ReadAfterLd]>,
|
||||
VEX, VEX_WIG;
|
||||
|
||||
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
|
||||
Sched<[WriteVecLogic]>, VEX, VEX_L, VEX_WIG;
|
||||
Sched<[SchedWriteVecLogic.YMM]>, VEX, VEX_L, VEX_WIG;
|
||||
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
|
||||
Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L, VEX_WIG;
|
||||
Sched<[SchedWriteVecLogic.YMM.Folded, ReadAfterLd]>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"ptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
|
||||
Sched<[WriteVecLogic]>;
|
||||
Sched<[SchedWriteVecLogic.XMM]>;
|
||||
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
||||
"ptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
|
||||
Sched<[WriteVecLogicLd, ReadAfterLd]>;
|
||||
Sched<[SchedWriteVecLogic.XMM.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
// The bit test instructions below are AVX only
|
||||
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
|
||||
X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>,
|
||||
Sched<[WriteVecLogic]>, VEX;
|
||||
Sched<[sched]>, VEX;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
|
||||
Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX;
|
||||
Sched<[sched.Folded, ReadAfterLd]>, VEX;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], Predicates = [HasAVX] in {
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>;
|
||||
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>,
|
||||
VEX_L;
|
||||
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32,
|
||||
SchedWriteVecLogic.XMM>;
|
||||
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32,
|
||||
SchedWriteVecLogic.YMM>, VEX_L;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>;
|
||||
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>,
|
||||
VEX_L;
|
||||
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64,
|
||||
SchedWriteVecLogic.XMM>;
|
||||
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64,
|
||||
SchedWriteVecLogic.YMM>, VEX_L;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user