1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[AVX512] Added VBROADCAST{SS/SD} encoding for VL subset.

Refactored through AVX512_maskable
        

llvm-svn: 220908
This commit is contained in:
Robert Khasanov 2014-10-30 14:21:47 +00:00
parent a5ac850672
commit 3e398a3800
3 changed files with 271 additions and 26 deletions

View File

@ -129,6 +129,10 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
v4i32x_info>;
def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
v2i64x_info>;
def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
v4f32x_info>;
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
v2f64x_info>;
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
@ -573,36 +577,57 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
RegisterClass DestRC,
RegisterClass SrcRC, X86MemOperand x86memop> {
def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
ValueType svt, X86VectorVTInfo _> {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
"$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
T8PD, EVEX;
let mayLoad = 1 in {
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src),
"vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
(_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
T8PD, EVEX;
}
}
multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
AVX512VLVectorVTInfo _> {
defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
EVEX_V512;
let Predicates = [HasVLX] in {
defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
EVEX_V256;
}
}
let ExeDomain = SSEPackedSingle in {
defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
VR128X, f32mem>,
EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
let Predicates = [HasVLX] in {
defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
v4f32, v4f32x_info>, EVEX_V128,
EVEX_CD8<32, CD8VT1>;
}
}
let ExeDomain = SSEPackedDouble in {
defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
VR128X, f64mem>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
}
def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSZrm addr:$src)>;
(VBROADCASTSSZm addr:$src)>;
def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDZrm addr:$src)>;
(VBROADCASTSDZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
(VBROADCASTSSZrm addr:$src)>;
(VBROADCASTSSZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
(VBROADCASTSDZrm addr:$src)>;
(VBROADCASTSDZm addr:$src)>;
multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
RegisterClass SrcRC, RegisterClass KRC> {
@ -711,14 +736,14 @@ def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
(VPBROADCASTQZrr VR128X:$src)>;
def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
(VBROADCASTSSZrr VR128X:$src)>;
(VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
(VBROADCASTSDZrr VR128X:$src)>;
(VBROADCASTSDZr VR128X:$src)>;
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
(VBROADCASTSSZrr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
(VBROADCASTSDZrr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
(VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
(VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
@ -726,16 +751,16 @@ def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
(VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
(VBROADCASTSSZrr VR128X:$src)>;
(VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
(VBROADCASTSDZrr VR128X:$src)>;
(VBROADCASTSDZr VR128X:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
(VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
(VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
(VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
(VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
let Predicates = [HasAVX512] in {

View File

@ -113,6 +113,94 @@
// CHECK: encoding: [0x62,0xe1,0x14,0x58,0x58,0x92,0xfc,0xfd,0xff,0xff]
vaddps -516(%rdx){1to16}, %zmm13, %zmm18
// CHECK: vbroadcastsd (%rcx), %zmm30
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x31]
vbroadcastsd (%rcx), %zmm30
// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4}
// CHECK: encoding: [0x62,0x62,0xfd,0x4c,0x19,0x31]
vbroadcastsd (%rcx), %zmm30 {%k4}
// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4} {z}
// CHECK: encoding: [0x62,0x62,0xfd,0xcc,0x19,0x31]
vbroadcastsd (%rcx), %zmm30 {%k4} {z}
// CHECK: vbroadcastsd 291(%rax,%r14,8), %zmm30
// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
vbroadcastsd 291(%rax,%r14,8), %zmm30
// CHECK: vbroadcastsd 1016(%rdx), %zmm30
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x7f]
vbroadcastsd 1016(%rdx), %zmm30
// CHECK: vbroadcastsd 1024(%rdx), %zmm30
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0x00,0x04,0x00,0x00]
vbroadcastsd 1024(%rdx), %zmm30
// CHECK: vbroadcastsd -1024(%rdx), %zmm30
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x80]
vbroadcastsd -1024(%rdx), %zmm30
// CHECK: vbroadcastsd -1032(%rdx), %zmm30
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0xf8,0xfb,0xff,0xff]
vbroadcastsd -1032(%rdx), %zmm30
// CHECK: vbroadcastsd %xmm22, %zmm21
// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x19,0xee]
vbroadcastsd %xmm22, %zmm21
// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7}
// CHECK: encoding: [0x62,0xa2,0xfd,0x4f,0x19,0xee]
vbroadcastsd %xmm22, %zmm21 {%k7}
// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7} {z}
// CHECK: encoding: [0x62,0xa2,0xfd,0xcf,0x19,0xee]
vbroadcastsd %xmm22, %zmm21 {%k7} {z}
// CHECK: vbroadcastss (%rcx), %zmm3
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x19]
vbroadcastss (%rcx), %zmm3
// CHECK: vbroadcastss (%rcx), %zmm3 {%k4}
// CHECK: encoding: [0x62,0xf2,0x7d,0x4c,0x18,0x19]
vbroadcastss (%rcx), %zmm3 {%k4}
// CHECK: vbroadcastss (%rcx), %zmm3 {%k4} {z}
// CHECK: encoding: [0x62,0xf2,0x7d,0xcc,0x18,0x19]
vbroadcastss (%rcx), %zmm3 {%k4} {z}
// CHECK: vbroadcastss 291(%rax,%r14,8), %zmm3
// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00]
vbroadcastss 291(%rax,%r14,8), %zmm3
// CHECK: vbroadcastss 508(%rdx), %zmm3
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x7f]
vbroadcastss 508(%rdx), %zmm3
// CHECK: vbroadcastss 512(%rdx), %zmm3
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0x00,0x02,0x00,0x00]
vbroadcastss 512(%rdx), %zmm3
// CHECK: vbroadcastss -512(%rdx), %zmm3
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x80]
vbroadcastss -512(%rdx), %zmm3
// CHECK: vbroadcastss -516(%rdx), %zmm3
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0xfc,0xfd,0xff,0xff]
vbroadcastss -516(%rdx), %zmm3
// CHECK: vbroadcastss %xmm18, %zmm18
// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x18,0xd2]
vbroadcastss %xmm18, %zmm18
// CHECK: vbroadcastss %xmm18, %zmm18 {%k2}
// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x18,0xd2]
vbroadcastss %xmm18, %zmm18 {%k2}
// CHECK: vbroadcastss %xmm18, %zmm18 {%k2} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0xca,0x18,0xd2]
vbroadcastss %xmm18, %zmm18 {%k2} {z}
// CHECK: vdivpd %zmm11, %zmm6, %zmm18
// CHECK: encoding: [0x62,0xc1,0xcd,0x48,0x5e,0xd3]
vdivpd %zmm11, %zmm6, %zmm18

View File

@ -224,6 +224,138 @@
// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x58,0x8a,0xfc,0xfd,0xff,0xff]
vaddps -516(%rdx){1to8}, %ymm26, %ymm25
// CHECK: vbroadcastsd (%rcx), %ymm22
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x31]
vbroadcastsd (%rcx), %ymm22
// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5}
// CHECK: encoding: [0x62,0xe2,0xfd,0x2d,0x19,0x31]
vbroadcastsd (%rcx), %ymm22 {%k5}
// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5} {z}
// CHECK: encoding: [0x62,0xe2,0xfd,0xad,0x19,0x31]
vbroadcastsd (%rcx), %ymm22 {%k5} {z}
// CHECK: vbroadcastsd 291(%rax,%r14,8), %ymm22
// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
vbroadcastsd 291(%rax,%r14,8), %ymm22
// CHECK: vbroadcastsd 1016(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x7f]
vbroadcastsd 1016(%rdx), %ymm22
// CHECK: vbroadcastsd 1024(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0x00,0x04,0x00,0x00]
vbroadcastsd 1024(%rdx), %ymm22
// CHECK: vbroadcastsd -1024(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x80]
vbroadcastsd -1024(%rdx), %ymm22
// CHECK: vbroadcastsd -1032(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0xf8,0xfb,0xff,0xff]
vbroadcastsd -1032(%rdx), %ymm22
// CHECK: vbroadcastsd %xmm17, %ymm19
// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x19,0xd9]
vbroadcastsd %xmm17, %ymm19
// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6}
// CHECK: encoding: [0x62,0xa2,0xfd,0x2e,0x19,0xd9]
vbroadcastsd %xmm17, %ymm19 {%k6}
// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6} {z}
// CHECK: encoding: [0x62,0xa2,0xfd,0xae,0x19,0xd9]
vbroadcastsd %xmm17, %ymm19 {%k6} {z}
// CHECK: vbroadcastss (%rcx), %xmm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x29]
vbroadcastss (%rcx), %xmm21
// CHECK: vbroadcastss (%rcx), %xmm21 {%k2}
// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x18,0x29]
vbroadcastss (%rcx), %xmm21 {%k2}
// CHECK: vbroadcastss (%rcx), %xmm21 {%k2} {z}
// CHECK: encoding: [0x62,0xe2,0x7d,0x8a,0x18,0x29]
vbroadcastss (%rcx), %xmm21 {%k2} {z}
// CHECK: vbroadcastss 291(%rax,%r14,8), %xmm21
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x18,0xac,0xf0,0x23,0x01,0x00,0x00]
vbroadcastss 291(%rax,%r14,8), %xmm21
// CHECK: vbroadcastss 508(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x7f]
vbroadcastss 508(%rdx), %xmm21
// CHECK: vbroadcastss 512(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0x00,0x02,0x00,0x00]
vbroadcastss 512(%rdx), %xmm21
// CHECK: vbroadcastss -512(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x80]
vbroadcastss -512(%rdx), %xmm21
// CHECK: vbroadcastss -516(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0xfc,0xfd,0xff,0xff]
vbroadcastss -516(%rdx), %xmm21
// CHECK: vbroadcastss (%rcx), %ymm30
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x31]
vbroadcastss (%rcx), %ymm30
// CHECK: vbroadcastss (%rcx), %ymm30 {%k1}
// CHECK: encoding: [0x62,0x62,0x7d,0x29,0x18,0x31]
vbroadcastss (%rcx), %ymm30 {%k1}
// CHECK: vbroadcastss (%rcx), %ymm30 {%k1} {z}
// CHECK: encoding: [0x62,0x62,0x7d,0xa9,0x18,0x31]
vbroadcastss (%rcx), %ymm30 {%k1} {z}
// CHECK: vbroadcastss 291(%rax,%r14,8), %ymm30
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x18,0xb4,0xf0,0x23,0x01,0x00,0x00]
vbroadcastss 291(%rax,%r14,8), %ymm30
// CHECK: vbroadcastss 508(%rdx), %ymm30
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x7f]
vbroadcastss 508(%rdx), %ymm30
// CHECK: vbroadcastss 512(%rdx), %ymm30
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0x00,0x02,0x00,0x00]
vbroadcastss 512(%rdx), %ymm30
// CHECK: vbroadcastss -512(%rdx), %ymm30
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x80]
vbroadcastss -512(%rdx), %ymm30
// CHECK: vbroadcastss -516(%rdx), %ymm30
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0xfc,0xfd,0xff,0xff]
vbroadcastss -516(%rdx), %ymm30
// CHECK: vbroadcastss %xmm24, %xmm24
// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x18,0xc0]
vbroadcastss %xmm24, %xmm24
// CHECK: vbroadcastss %xmm24, %xmm24 {%k2}
// CHECK: encoding: [0x62,0x02,0x7d,0x0a,0x18,0xc0]
vbroadcastss %xmm24, %xmm24 {%k2}
// CHECK: vbroadcastss %xmm24, %xmm24 {%k2} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0x8a,0x18,0xc0]
vbroadcastss %xmm24, %xmm24 {%k2} {z}
// CHECK: vbroadcastss %xmm28, %ymm24
// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x18,0xc4]
vbroadcastss %xmm28, %ymm24
// CHECK: vbroadcastss %xmm28, %ymm24 {%k6}
// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x18,0xc4]
vbroadcastss %xmm28, %ymm24 {%k6}
// CHECK: vbroadcastss %xmm28, %ymm24 {%k6} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x18,0xc4]
vbroadcastss %xmm28, %ymm24 {%k6} {z}
// CHECK: vdivpd %xmm27, %xmm18, %xmm19
// CHECK: encoding: [0x62,0x81,0xed,0x00,0x5e,0xdb]
vdivpd %xmm27, %xmm18, %xmm19