mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
AVX-512: Embedded Rounding Control - encoding and printing
Changed intrinsics for vrcp14/vrcp28 vrsqrt14/vrsqrt28 - aligned with GCC. llvm-svn: 199102
This commit is contained in:
parent
34e4c02c42
commit
e635ade802
@ -1248,6 +1248,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
|
||||
llvm_v4i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_ptestm_d_512 : GCCBuiltin<"__builtin_ia32_ptestmd512">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector extract sign mask
|
||||
@ -1696,6 +1702,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pbroadcastq_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pbroadcast_d_gpr_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pbroadcast_q_gpr_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastq512_gpr_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pbroadcast_q_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastq512_mem_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector permutation
|
||||
@ -2876,54 +2894,62 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rcp28_ps_512 : GCCBuiltin<"__builtin_ia32_rcp28ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_pd_512 : GCCBuiltin<"__builtin_ia32_rcp28pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt28ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt28pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
|
@ -125,14 +125,12 @@ void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
|
||||
|
||||
void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
|
||||
raw_ostream &O) {
|
||||
int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
|
||||
int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
|
||||
switch (Imm) {
|
||||
case 0: O << "{rn-sae}"; break;
|
||||
case 1: O << "{rd-sae}"; break;
|
||||
case 2: O << "{ru-sae}"; break;
|
||||
case 3: O << "{rz-sae}"; break;
|
||||
|
||||
default: llvm_unreachable("Invalid AVX-512 rounding control argument!");
|
||||
}
|
||||
}
|
||||
/// printPCRelImm - This is used to print an immediate value that ends up
|
||||
|
@ -115,14 +115,12 @@ void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
|
||||
|
||||
void X86IntelInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
|
||||
raw_ostream &O) {
|
||||
int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
|
||||
int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
|
||||
switch (Imm) {
|
||||
case 0: O << "{rn-sae}"; break;
|
||||
case 1: O << "{rd-sae}"; break;
|
||||
case 2: O << "{ru-sae}"; break;
|
||||
case 3: O << "{rz-sae}"; break;
|
||||
|
||||
default: llvm_unreachable("Invalid AVX-512 rounding control argument!");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -503,8 +503,10 @@ namespace X86II {
|
||||
MemOp4 = 1U << 18,
|
||||
|
||||
/// XOP - Opcode prefix used by XOP instructions.
|
||||
XOP = 1U << 19
|
||||
XOP = 1U << 19,
|
||||
|
||||
/// Explicitly specified rounding control
|
||||
EVEX_RC = 1U << 20
|
||||
};
|
||||
|
||||
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
|
||||
|
@ -604,7 +604,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
|
||||
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
|
||||
bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
|
||||
bool HasEVEX_RC = false;
|
||||
bool HasEVEX_RC = (TSFlags >> X86II::VEXShift) & X86II::EVEX_RC;
|
||||
|
||||
// VEX_R: opcode externsion equivalent to REX.R in
|
||||
// 1's complement (inverted) form
|
||||
@ -686,6 +686,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
// EVEX_aaa
|
||||
unsigned char EVEX_aaa = 0;
|
||||
|
||||
bool EncodeRC = false;
|
||||
|
||||
// Encode the operand size opcode prefix as needed.
|
||||
if (TSFlags & X86II::OpSize)
|
||||
VEX_PP = 0x01;
|
||||
@ -749,7 +751,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
|
||||
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
|
||||
unsigned NumOps = Desc.getNumOperands();
|
||||
unsigned RcOperand = NumOps-1;
|
||||
unsigned CurOp = 0;
|
||||
if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
|
||||
++CurOp;
|
||||
@ -910,10 +911,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
if (HasVEX_4VOp3)
|
||||
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
|
||||
if (EVEX_b) {
|
||||
assert(RcOperand >= CurOp);
|
||||
EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
|
||||
HasEVEX_RC = true;
|
||||
}
|
||||
if (HasEVEX_RC) {
|
||||
unsigned RcOperand = NumOps-1;
|
||||
assert(RcOperand >= CurOp);
|
||||
EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
|
||||
}
|
||||
EncodeRC = true;
|
||||
}
|
||||
break;
|
||||
case X86II::MRMDestReg:
|
||||
// MRMDestReg instructions forms:
|
||||
@ -940,6 +944,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
VEX_R = 0x0;
|
||||
if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
|
||||
EVEX_R2 = 0x0;
|
||||
if (EVEX_b)
|
||||
EncodeRC = true;
|
||||
break;
|
||||
case X86II::MRM0r: case X86II::MRM1r:
|
||||
case X86II::MRM2r: case X86II::MRM3r:
|
||||
@ -1013,7 +1019,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
(VEX_4V << 3) |
|
||||
(EVEX_U << 2) |
|
||||
VEX_PP, CurByte, OS);
|
||||
if (HasEVEX_RC)
|
||||
if (EncodeRC)
|
||||
EmitByte((EVEX_z << 7) |
|
||||
(EVEX_rc << 5) |
|
||||
(EVEX_b << 4) |
|
||||
@ -1293,7 +1299,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
// It uses the EVEX.aaa field?
|
||||
bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX;
|
||||
bool HasEVEX_K = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
|
||||
bool HasEVEX_B = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_B);
|
||||
bool HasEVEX_RC = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_RC);
|
||||
|
||||
// Determine where the memory operand starts, if present.
|
||||
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
|
||||
@ -1391,7 +1397,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
if (HasVEX_4VOp3)
|
||||
++CurOp;
|
||||
// do not count the rounding control operand
|
||||
if (HasEVEX_B)
|
||||
if (HasEVEX_RC)
|
||||
NumOps--;
|
||||
break;
|
||||
|
||||
|
@ -432,6 +432,13 @@ def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
|
||||
def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
|
||||
(VPBROADCASTQrZrr GR64:$src)>;
|
||||
|
||||
def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
|
||||
(v16i32 immAllZerosV), (i16 GR16:$mask))),
|
||||
(VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
|
||||
def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
|
||||
(bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
|
||||
(VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
|
||||
|
||||
multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
|
||||
X86MemOperand x86memop, PatFrag ld_frag,
|
||||
RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
|
||||
@ -639,12 +646,12 @@ defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
|
||||
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), (i16 GR16:$mask))),
|
||||
(VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
|
||||
(VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
|
||||
VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), (i8 GR8:$mask))),
|
||||
(VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
|
||||
(VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
|
||||
VR512:$src1, VR512:$src2)>;
|
||||
|
||||
defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
|
||||
@ -809,7 +816,7 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
|
||||
def rrib: AVX512PIi8<0xC2, MRMSrcReg,
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc, i32imm:$sae),
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
|
||||
!strconcat("vcmp${cc}", suffix,
|
||||
"\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
|
||||
[], d>, EVEX_B;
|
||||
@ -859,13 +866,13 @@ def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), imm:$cc, (i16 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc), (i32 0)), GR16)>;
|
||||
(I8Imm imm:$cc)), GR16)>;
|
||||
|
||||
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), imm:$cc, (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc), (i32 0)), GR8)>;
|
||||
(I8Imm imm:$cc)), GR8)>;
|
||||
|
||||
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), imm:$cc, (i16 -1),
|
||||
@ -949,6 +956,10 @@ let Predicates = [HasAVX512] in {
|
||||
def : Pat<(i8 (zext VK1:$src)),
|
||||
(EXTRACT_SUBREG
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
|
||||
def : Pat<(i64 (zext VK1:$src)),
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>;
|
||||
|
||||
}
|
||||
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
|
||||
let Predicates = [HasAVX512] in {
|
||||
@ -1170,6 +1181,9 @@ defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
|
||||
def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
|
||||
def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
|
||||
def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
|
||||
def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
|
||||
}
|
||||
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
|
||||
(v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
|
||||
@ -2112,6 +2126,13 @@ defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
|
||||
memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
|
||||
(v16i32 VR512:$src2), (i16 -1))),
|
||||
(COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
|
||||
|
||||
def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
|
||||
(v8i64 VR512:$src2), (i8 -1))),
|
||||
(COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR8)>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 Shift instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2791,7 +2812,7 @@ let hasSideEffects = 0 in {
|
||||
(OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
|
||||
def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
|
||||
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
|
||||
[], d>, EVEX, EVEX_B;
|
||||
[], d>, EVEX, EVEX_B, EVEX_RC;
|
||||
let mayLoad = 1 in
|
||||
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
@ -2916,7 +2937,7 @@ let hasSideEffects = 0 in {
|
||||
[], d>, EVEX;
|
||||
def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
|
||||
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
|
||||
[], d>, EVEX, EVEX_B;
|
||||
[], d>, EVEX, EVEX_B, EVEX_RC;
|
||||
let mayLoad = 1 in
|
||||
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
@ -3025,152 +3046,184 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
|
||||
}
|
||||
}
|
||||
|
||||
/// avx512_unop_p - AVX-512 unops in packed form.
|
||||
multiclass avx512_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
def PSZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst, (v16f32 (OpNode VR512:$src)))]>,
|
||||
EVEX, EVEX_V512;
|
||||
def PSZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f256mem:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
def PDZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst, (v8f64 (OpNode VR512:$src)))]>,
|
||||
EVEX, EVEX_V512, VEX_W;
|
||||
def PDZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
|
||||
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms.
|
||||
multiclass avx512_fp_unop_p_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic V16F32Int, Intrinsic V8F64Int> {
|
||||
let isCodeGenOnly = 1 in {
|
||||
def PSZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst, (V16F32Int VR512:$src))]>,
|
||||
EVEX, EVEX_V512;
|
||||
def PSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst,
|
||||
(V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
def PDZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst, (V8F64Int VR512:$src))]>,
|
||||
EVEX, EVEX_V512, VEX_W;
|
||||
def PDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR512:$dst,
|
||||
(V8F64Int (memopv8f64 addr:$src)))]>,
|
||||
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
} // isCodeGenOnly = 1
|
||||
}
|
||||
|
||||
/// avx512_fp_unop_s - AVX-512 unops in scalar form.
|
||||
multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr> {
|
||||
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
|
||||
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop> {
|
||||
let hasSideEffects = 0 in {
|
||||
def SSZr : AVX5128I<opc, MRMSrcReg, (outs FR32X:$dst),
|
||||
(ins FR32X:$src1, FR32X:$src2),
|
||||
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V;
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
|
||||
let mayLoad = 1 in {
|
||||
def SSZm : AVX5128I<opc, MRMSrcMem, (outs FR32X:$dst),
|
||||
(ins FR32X:$src1, f32mem:$src2),
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
let isCodeGenOnly = 1 in
|
||||
def SSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins VR128X:$src1, ssmem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
}
|
||||
def SDZr : AVX5128I<opc, MRMSrcReg, (outs FR64X:$dst),
|
||||
(ins FR64X:$src1, FR64X:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
|
||||
EVEX_4V, VEX_W;
|
||||
let mayLoad = 1 in {
|
||||
def SDZm : AVX5128I<opc, MRMSrcMem, (outs FR64X:$dst),
|
||||
(ins FR64X:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
|
||||
EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
let isCodeGenOnly = 1 in
|
||||
def SDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins VR128X:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14">,
|
||||
avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>,
|
||||
avx512_fp_unop_p_int<0x4C, "vrcp14",
|
||||
int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>;
|
||||
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14">,
|
||||
avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>,
|
||||
avx512_fp_unop_p_int<0x4E, "vrsqrt14",
|
||||
int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>;
|
||||
def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
|
||||
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
|
||||
(COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
|
||||
|
||||
def : Pat<(int_x86_avx512_rsqrt14_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRSQRT14SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rsqrt14_ss sse_load_f32:$src),
|
||||
(VRSQRT14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
|
||||
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
|
||||
(COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
|
||||
|
||||
def : Pat<(int_x86_avx512_rcp14_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRCP14SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rcp14_ss sse_load_f32:$src),
|
||||
(VRCP14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
|
||||
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
|
||||
(COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
|
||||
|
||||
let AddedComplexity = 20, Predicates = [HasERI] in {
|
||||
defm VRCP28 : avx512_fp_unop_s<0xCB, "vrcp28">,
|
||||
avx512_fp_unop_p<0xCA, "vrcp28", X86frcp>,
|
||||
avx512_fp_unop_p_int<0xCA, "vrcp28",
|
||||
int_x86_avx512_rcp28_ps_512, int_x86_avx512_rcp28_pd_512>;
|
||||
def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
|
||||
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
|
||||
(COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
|
||||
|
||||
defm VRSQRT28 : avx512_fp_unop_s<0xCD, "vrsqrt28">,
|
||||
avx512_fp_unop_p<0xCC, "vrsqrt28", X86frsqrt>,
|
||||
avx512_fp_unop_p_int<0xCC, "vrsqrt28",
|
||||
int_x86_avx512_rsqrt28_ps_512, int_x86_avx512_rsqrt28_pd_512>;
|
||||
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
|
||||
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
RegisterClass RC, X86MemOperand x86memop,
|
||||
PatFrag mem_frag, ValueType OpVt> {
|
||||
def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
|
||||
EVEX;
|
||||
def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
|
||||
EVEX;
|
||||
}
|
||||
defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
|
||||
memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
|
||||
memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
|
||||
memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
|
||||
memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
|
||||
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
|
||||
(VRSQRT14PSZr VR512:$src)>;
|
||||
def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
|
||||
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
|
||||
(VRSQRT14PDZr VR512:$src)>;
|
||||
|
||||
def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
|
||||
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
|
||||
(VRCP14PSZr VR512:$src)>;
|
||||
def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
|
||||
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
|
||||
(VRCP14PDZr VR512:$src)>;
|
||||
|
||||
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
|
||||
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop> {
|
||||
let hasSideEffects = 0, Predicates = [HasERI] in {
|
||||
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
|
||||
def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
|
||||
[]>, EVEX_4V, EVEX_B;
|
||||
let mayLoad = 1 in {
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasERI] in {
|
||||
def : Pat<(int_x86_avx512_rsqrt28_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRSQRT28SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rsqrt28_ss sse_load_f32:$src),
|
||||
(VRSQRT28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
def : Pat<(int_x86_avx512_rcp28_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRCP28SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rcp28_ss sse_load_f32:$src),
|
||||
(VRCP28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
|
||||
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
|
||||
|
||||
def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
|
||||
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
|
||||
|
||||
def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
|
||||
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
|
||||
|
||||
def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
|
||||
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
|
||||
|
||||
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
|
||||
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass RC, X86MemOperand x86memop> {
|
||||
let hasSideEffects = 0, Predicates = [HasERI] in {
|
||||
def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src, $dst|$dst, $src}"),
|
||||
[]>, EVEX;
|
||||
def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
|
||||
[]>, EVEX, EVEX_B;
|
||||
def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>, EVEX;
|
||||
}
|
||||
}
|
||||
defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
|
||||
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
|
||||
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
|
||||
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
|
||||
(VRSQRT28PSZrb VR512:$src)>;
|
||||
def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
|
||||
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
|
||||
(VRSQRT28PDZrb VR512:$src)>;
|
||||
|
||||
def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
|
||||
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
|
||||
(VRCP28PSZrb VR512:$src)>;
|
||||
def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
|
||||
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
|
||||
(VRCP28PDZrb VR512:$src)>;
|
||||
|
||||
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
Intrinsic V16F32Int, Intrinsic V8F64Int,
|
||||
OpndItins itins_s, OpndItins itins_d> {
|
||||
@ -3303,15 +3356,15 @@ let Predicates = [HasAVX512] in {
|
||||
Requires<[OptForSize]>;
|
||||
|
||||
def : Pat<(f32 (X86frsqrt FR32X:$src)),
|
||||
(VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
(VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (X86frsqrt (load addr:$src))),
|
||||
(VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
(VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
|
||||
def : Pat<(f32 (X86frcp FR32X:$src)),
|
||||
(VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
(VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (X86frcp (load addr:$src))),
|
||||
(VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
(VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
|
||||
def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
|
||||
|
@ -150,6 +150,7 @@ class EVEX_4V : VEX_4V { bit hasEVEXPrefix = 1; }
|
||||
class EVEX_K { bit hasEVEX_K = 1; }
|
||||
class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
|
||||
class EVEX_B { bit hasEVEX_B = 1; }
|
||||
class EVEX_RC { bit hasEVEX_RC = 1; }
|
||||
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
|
||||
class EVEX_CD8<int esize, CD8VForm form> {
|
||||
bits<2> EVEX_CD8E = !if(!eq(esize, 8), 0b00,
|
||||
@ -217,6 +218,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
||||
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
|
||||
bit hasMemOp4Prefix = 0; // Same bit as VEX_W, but used for swapping operands
|
||||
bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix?
|
||||
bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction.
|
||||
|
||||
// TSFlags layout should be kept in sync with X86InstrInfo.h.
|
||||
let TSFlags{5-0} = FormBits;
|
||||
@ -247,6 +249,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
||||
let TSFlags{49} = has3DNow0F0FOpcode;
|
||||
let TSFlags{50} = hasMemOp4Prefix;
|
||||
let TSFlags{51} = hasXOP_Prefix;
|
||||
let TSFlags{52} = hasEVEX_RC;
|
||||
}
|
||||
|
||||
class PseudoI<dag oops, dag iops, list<dag> pattern>
|
||||
|
@ -56,4 +56,21 @@ define i16 @test4(i16 %a, i16 %b) {
|
||||
%b1 = add i16 %a, %b
|
||||
ret i16 %b1
|
||||
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test5
|
||||
; CHECK: ret
|
||||
define float @test5(float %p) #0 {
|
||||
entry:
|
||||
%cmp = fcmp oeq float %p, 0.000000e+00
|
||||
br i1 %cmp, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%cmp1 = fcmp ogt float %p, 0.000000e+00
|
||||
%cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
|
||||
br label %return
|
||||
|
||||
return: ; preds = %if.end, %entry
|
||||
%retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
|
||||
ret float %retval.0
|
||||
}
|
@ -47,32 +47,32 @@ define i16 @unpckbw_test(i16 %a0, i16 %a1) {
|
||||
}
|
||||
|
||||
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrcp14ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
|
||||
|
||||
define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vrcp14pd
|
||||
%res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
|
||||
; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
|
||||
%res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
|
||||
|
||||
define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrcp28ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
; CHECK: vrcp28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vrcp28pd
|
||||
%res = call <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
|
||||
; CHECK: vrcp28pd {sae}, {{.*}}encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
|
||||
%res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) ; <<8 x double>> [#uses=1]
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
|
||||
|
||||
@ -91,46 +91,46 @@ define <16 x float> @test8(<16 x float> %a) {
|
||||
}
|
||||
|
||||
define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrsqrt14ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
|
||||
|
||||
define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrsqrt28ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
; CHECK: vrsqrt28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt14ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt28ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
; CHECK: vrsqrt28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
|
||||
; CHECK: vrcp14ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrcp28ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
; CHECK: vrcp28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vsqrtpd
|
||||
|
@ -210,12 +210,13 @@ static inline bool inheritsFrom(InstructionContext child,
|
||||
return inheritsFrom(child, IC_EVEX_W_XD_K) ||
|
||||
inheritsFrom(child, IC_EVEX_L_W_XD_K);
|
||||
case IC_EVEX_OPSIZE_K:
|
||||
return inheritsFrom(child, IC_EVEX_W_OPSIZE_K) ||
|
||||
inheritsFrom(child, IC_EVEX_W_OPSIZE_K);
|
||||
case IC_EVEX_OPSIZE_B:
|
||||
return false;
|
||||
case IC_EVEX_W_K:
|
||||
case IC_EVEX_W_XS_K:
|
||||
case IC_EVEX_W_XD_K:
|
||||
case IC_EVEX_W_OPSIZE_K:
|
||||
case IC_EVEX_W_OPSIZE_B:
|
||||
return false;
|
||||
case IC_EVEX_L_K:
|
||||
case IC_EVEX_L_XS_K:
|
||||
|
Loading…
Reference in New Issue
Block a user