mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
AVX-512: Added intrinsic for cvtph2ps.
Added VPTESTNM instruction. Added a pattern to vselect (lit tests will follow). llvm-svn: 200823
This commit is contained in:
parent
792771e814
commit
2e0202b75e
@ -2648,6 +2648,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
|
def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
|
||||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
|
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
|
||||||
|
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
|
||||||
|
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
|
||||||
|
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
|
||||||
|
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -2756,12 +2762,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
|
def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
|
||||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||||
llvm_i64_ty], [IntrNoMem]>;
|
llvm_i64_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512">,
|
|
||||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
|
|
||||||
def int_x86_avx512_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512">,
|
|
||||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vector convert
|
// Vector convert
|
||||||
|
@ -312,8 +312,9 @@ namespace llvm {
|
|||||||
// TESTP - Vector packed fp sign bitwise comparisons.
|
// TESTP - Vector packed fp sign bitwise comparisons.
|
||||||
TESTP,
|
TESTP,
|
||||||
|
|
||||||
// TESTM - Vector "test" in AVX-512, the result is in a mask vector.
|
// TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector.
|
||||||
TESTM,
|
TESTM,
|
||||||
|
TESTNM,
|
||||||
|
|
||||||
// OR/AND test for masks
|
// OR/AND test for masks
|
||||||
KORTEST,
|
KORTEST,
|
||||||
|
@ -613,13 +613,13 @@ defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512me
|
|||||||
defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
|
defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
|
||||||
X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
defm VPERM2D : avx512_perm_3src<0x7E, "vperm2d", VR512, memopv16i32, i512mem,
|
defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem,
|
||||||
X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
defm VPERM2Q : avx512_perm_3src<0x7E, "vperm2q", VR512, memopv8i64, i512mem,
|
defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem,
|
||||||
X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
defm VPERM2PS : avx512_perm_3src<0x7F, "vperm2ps", VR512, memopv16f32, i512mem,
|
defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem,
|
||||||
X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
defm VPERM2PD : avx512_perm_3src<0x7F, "vperm2pd", VR512, memopv8f64, i512mem,
|
defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
|
||||||
X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// AVX-512 - BLEND using mask
|
// AVX-512 - BLEND using mask
|
||||||
@ -1332,6 +1332,11 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
||||||
[]>, EVEX, EVEX_K;
|
[]>, EVEX, EVEX_K;
|
||||||
}
|
}
|
||||||
|
def rrkz : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
|
||||||
|
(ins KRC:$mask, RC:$src),
|
||||||
|
!strconcat(asm,
|
||||||
|
" \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), []>,
|
||||||
|
EVEX, EVEX_KZ;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
|
defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
|
||||||
@ -1351,6 +1356,23 @@ def : Pat<(store (v16i32 VR512:$src), addr:$dst),
|
|||||||
(VMOVDQU32mr addr:$dst, VR512:$src)>;
|
(VMOVDQU32mr addr:$dst, VR512:$src)>;
|
||||||
|
|
||||||
let AddedComplexity = 20 in {
|
let AddedComplexity = 20 in {
|
||||||
|
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
|
||||||
|
(bc_v8i64 (v16i32 immAllZerosV)))),
|
||||||
|
(VMOVDQU64rrkz VK8WM:$mask, VR512:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
|
||||||
|
(v8i64 VR512:$src))),
|
||||||
|
(VMOVDQU64rrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
|
||||||
|
VK8), VR512:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
|
||||||
|
(v16i32 immAllZerosV))),
|
||||||
|
(VMOVDQU32rrkz VK16WM:$mask, VR512:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
|
||||||
|
(v16i32 VR512:$src))),
|
||||||
|
(VMOVDQU32rrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
|
||||||
|
|
||||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
|
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
|
||||||
(v16f32 VR512:$src2))),
|
(v16f32 VR512:$src2))),
|
||||||
(VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
|
(VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
|
||||||
@ -2118,24 +2140,34 @@ def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
|
|||||||
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||||
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
|
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
|
||||||
SDNode OpNode, ValueType vt> {
|
SDNode OpNode, ValueType vt> {
|
||||||
def rr : AVX5128I<opc, MRMSrcReg,
|
def rr : AVX512PI<opc, MRMSrcReg,
|
||||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2),
|
(outs KRC:$dst), (ins RC:$src1, RC:$src2),
|
||||||
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
|
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
|
||||||
def rm : AVX5128I<opc, MRMSrcMem,
|
SSEPackedInt>, EVEX_4V;
|
||||||
|
def rm : AVX512PI<opc, MRMSrcMem,
|
||||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
|
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||||
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(set KRC:$dst, (OpNode (vt RC:$src1),
|
[(set KRC:$dst, (OpNode (vt RC:$src1),
|
||||||
(bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
|
(bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
|
defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
|
||||||
memopv16i32, X86testm, v16i32>, EVEX_V512,
|
memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
|
||||||
EVEX_CD8<32, CD8VF>;
|
EVEX_CD8<32, CD8VF>;
|
||||||
defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
|
defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
|
||||||
memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
|
memopv8i64, X86testm, v8i64>, T8XS, EVEX_V512, VEX_W,
|
||||||
EVEX_CD8<64, CD8VF>;
|
EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
|
let Predicates = [HasCDI] in {
|
||||||
|
defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
|
||||||
|
memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
|
||||||
|
EVEX_CD8<32, CD8VF>;
|
||||||
|
defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
|
||||||
|
memopv8i64, X86testnm, v8i64>, T8PD, EVEX_V512, VEX_W,
|
||||||
|
EVEX_CD8<64, CD8VF>;
|
||||||
|
}
|
||||||
|
|
||||||
def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
|
def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
|
||||||
(v16i32 VR512:$src2), (i16 -1))),
|
(v16i32 VR512:$src2), (i16 -1))),
|
||||||
(COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
|
(COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
|
||||||
@ -2997,35 +3029,41 @@ let Predicates = [HasAVX512] in {
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Half precision conversion instructions
|
// Half precision conversion instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
multiclass avx512_f16c_ph2ps<RegisterClass destRC, RegisterClass srcRC,
|
multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
|
||||||
X86MemOperand x86memop, Intrinsic Int> {
|
X86MemOperand x86memop> {
|
||||||
def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
|
def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
|
||||||
"vcvtph2ps\t{$src, $dst|$dst, $src}",
|
"vcvtph2ps\t{$src, $dst|$dst, $src}",
|
||||||
[(set destRC:$dst, (Int srcRC:$src))]>, EVEX;
|
[]>, EVEX;
|
||||||
let hasSideEffects = 0, mayLoad = 1 in
|
let hasSideEffects = 0, mayLoad = 1 in
|
||||||
def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
|
def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
|
||||||
"vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
|
"vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_f16c_ps2ph<RegisterClass destRC, RegisterClass srcRC,
|
multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
|
||||||
X86MemOperand x86memop, Intrinsic Int> {
|
X86MemOperand x86memop> {
|
||||||
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
|
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
|
||||||
(ins srcRC:$src1, i32i8imm:$src2),
|
(ins srcRC:$src1, i32i8imm:$src2),
|
||||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX;
|
[]>, EVEX;
|
||||||
let hasSideEffects = 0, mayStore = 1 in
|
let hasSideEffects = 0, mayStore = 1 in
|
||||||
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
|
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
|
||||||
(ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
|
(ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
|
||||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
|
"vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VCVTPH2PSZ : avx512_f16c_ph2ps<VR512, VR256X, f256mem,
|
defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
|
||||||
int_x86_avx512_vcvtph2ps_512>, EVEX_V512,
|
|
||||||
EVEX_CD8<32, CD8VH>;
|
EVEX_CD8<32, CD8VH>;
|
||||||
defm VCVTPS2PHZ : avx512_f16c_ps2ph<VR256X, VR512, f256mem,
|
defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
|
||||||
int_x86_avx512_vcvtps2ph_512>, EVEX_V512,
|
|
||||||
EVEX_CD8<32, CD8VH>;
|
EVEX_CD8<32, CD8VH>;
|
||||||
|
|
||||||
|
def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
|
||||||
|
imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
|
||||||
|
(VCVTPS2PHZrr VR512:$src, imm:$rc)>;
|
||||||
|
|
||||||
|
def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
|
||||||
|
(bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
|
||||||
|
(VCVTPH2PSZrr VR256X:$src)>;
|
||||||
|
|
||||||
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
|
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
|
||||||
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
|
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
|
||||||
"ucomiss">, TB, EVEX, VEX_LIG,
|
"ucomiss">, TB, EVEX, VEX_LIG,
|
||||||
|
@ -177,6 +177,9 @@ def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
|
|||||||
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
|
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||||
SDTCisVec<1>,
|
SDTCisVec<1>,
|
||||||
SDTCisSameAs<2, 1>]>>;
|
SDTCisSameAs<2, 1>]>>;
|
||||||
|
def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||||
|
SDTCisVec<1>,
|
||||||
|
SDTCisSameAs<2, 1>]>>;
|
||||||
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
|
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
|
||||||
|
|
||||||
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
|
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
|
||||||
|
@ -220,19 +220,20 @@ define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
|
|||||||
declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
|
declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
|
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
|
||||||
; CHECK: vcvtph2ps
|
; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16> %a0)
|
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
|
||||||
ret <16 x float> %res
|
ret <16 x float> %res
|
||||||
}
|
}
|
||||||
declare <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16>) nounwind readonly
|
declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
|
||||||
|
|
||||||
|
|
||||||
define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
|
define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
|
||||||
; CHECK: vcvtps2ph
|
; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
|
||||||
%res = call <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float> %a0, i32 0)
|
%res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
|
||||||
ret <16 x i16> %res
|
ret <16 x i16> %res
|
||||||
}
|
}
|
||||||
declare <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float>, i32) nounwind readonly
|
|
||||||
|
declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
|
||||||
|
|
||||||
define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
|
define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
|
||||||
; CHECK: vbroadcastss
|
; CHECK: vbroadcastss
|
||||||
|
@ -49,7 +49,7 @@ define <8 x double> @test4(<8 x double> %a) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test5:
|
; CHECK-LABEL: test5:
|
||||||
; CHECK: vperm2pd
|
; CHECK: vpermt2pd
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
|
define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
|
||||||
%c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
|
%c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
|
||||||
@ -65,7 +65,7 @@ define <8 x i64> @test6(<8 x i64> %a) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test7:
|
; CHECK-LABEL: test7:
|
||||||
; CHECK: vperm2q
|
; CHECK: vpermt2q
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
|
define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
|
||||||
%c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
|
%c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
|
||||||
@ -73,7 +73,7 @@ define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test8:
|
; CHECK-LABEL: test8:
|
||||||
; CHECK: vperm2d
|
; CHECK: vpermt2d
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
|
define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||||
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
||||||
@ -81,7 +81,7 @@ define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test9:
|
; CHECK-LABEL: test9:
|
||||||
; CHECK: vperm2ps
|
; CHECK: vpermt2ps
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
|
define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
|
||||||
%c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
%c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
||||||
@ -89,7 +89,7 @@ define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test10:
|
; CHECK-LABEL: test10:
|
||||||
; CHECK: vperm2ps (
|
; CHECK: vpermt2ps (
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
|
define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
|
||||||
%c = load <16 x float>* %b
|
%c = load <16 x float>* %b
|
||||||
@ -98,7 +98,7 @@ define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test11:
|
; CHECK-LABEL: test11:
|
||||||
; CHECK: vperm2d
|
; CHECK: vpermt2d
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
|
define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
|
||||||
%c = load <16 x i32>* %b
|
%c = load <16 x i32>* %b
|
||||||
@ -202,7 +202,7 @@ define <16 x float> @test23(<16 x float> %a, <16 x float> %c) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: @test24
|
; CHECK-LABEL: @test24
|
||||||
; CHECK: vperm2d
|
; CHECK: vpermt2d
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
|
define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||||
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
Loading…
Reference in New Issue
Block a user