1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[X86] Add isel patterns for matching broadcast vpternlog if the ternlog and the broadcast have different types.

This commit is contained in:
Craig Topper 2020-07-10 15:15:01 -07:00
parent 45343bf20f
commit 109a838777
8 changed files with 307 additions and 26 deletions

View File

@ -11365,6 +11365,36 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
(bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
(bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
VR128X:$src2, (i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
(bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
(bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
VR128X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
@ -11382,6 +11412,66 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
(bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
(bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
VR128X:$src2, (i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
(bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
(bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
VR128X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
(bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
(bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
VR128X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
(bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
(bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
VR128X:$src2, (i8 timm:$src4))),
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@ -11399,6 +11489,36 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
(bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
(bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
(bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
(bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@ -11415,6 +11535,66 @@ let Predicates = [HasVLX] in {
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
(bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
(bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
(bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
(bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
(bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
(bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
(bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
(bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
}
let Predicates = [HasAVX512] in {
@ -11435,6 +11615,36 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, VR512:$src1, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v64i8 (X86vpternlog VR512:$src1,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
(bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
VR512:$src2, VR512:$src1, (i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v64i8 (X86vpternlog VR512:$src1,
(bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
@ -11448,9 +11658,84 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
VR512:$src2, (i8 timm:$src4))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, VR512:$src1, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
(bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
VR512:$src2, VR512:$src1, (i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1,
(bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, VR512:$src1, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
(bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
VR512:$src2, VR512:$src1, (i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v16i32 (X86vpternlog VR512:$src1,
(bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
(i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
timm:$src4)>;
def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, VR512:$src1, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v8i64 (X86vpternlog VR512:$src1,
(bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
}
// Patterns to implement vnot using vpternlog instead of creating all ones

View File

@ -2905,8 +2905,7 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm2, %zmm2
; AVX512VL-NEXT: vpord %zmm1, %zmm2, %zmm1
; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [18446744073709551360,18446744073709551360]
; AVX512VL-NEXT: vpternlogq $216, %xmm2, %xmm1, %xmm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip){1to2}, %xmm1, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;

View File

@ -2376,8 +2376,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
; AVX512VL-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
; AVX512VL-NEXT: vpternlogq $216, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip){1to4}, %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: constant_funnnel_v32i8:

View File

@ -1184,8 +1184,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vporq %zmm1, %zmm2, %zmm1
; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip){1to8}, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: constant_funnnel_v64i8:
@ -1236,8 +1235,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512VL-NEXT: vpackuswb %ymm5, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vporq %zmm1, %zmm2, %zmm1
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
; AVX512VL-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: constant_funnnel_v64i8:

View File

@ -2651,9 +2651,8 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [18446744073709551360,18446744073709551360]
; AVX512VL-NEXT: vpternlogq $202, %xmm1, %xmm2, %xmm0
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip){1to2}, %xmm1, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;

View File

@ -2083,9 +2083,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; AVX512VL-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512VL-NEXT: vpackuswb %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
; AVX512VL-NEXT: vpternlogq $202, %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip){1to4}, %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: constant_funnnel_v32i8:

View File

@ -1171,9 +1171,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpackuswb %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm2
; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
; AVX512F-NEXT: vpternlogq $202, %zmm1, %zmm2, %zmm0
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip){1to8}, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: constant_funnnel_v64i8:
@ -1223,9 +1222,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512VL-NEXT: vpackuswb %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
; AVX512VL-NEXT: vpternlogq $202, %zmm1, %zmm2, %zmm0
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: constant_funnnel_v64i8:

View File

@ -337,11 +337,15 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
; SKX-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
; SKX-NEXT: ret{{[l|q]}}
;
; KNL-LABEL: test_mm512_mask_blend_epi16:
; KNL: # %bb.0: # %entry
; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535]
; KNL-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0
; KNL-NEXT: ret{{[l|q]}}
; KNL64-LABEL: test_mm512_mask_blend_epi16:
; KNL64: # %bb.0: # %entry
; KNL64-NEXT: vpternlogd $216, {{.*}}(%rip){1to16}, %zmm1, %zmm0
; KNL64-NEXT: retq
;
; KNL32-LABEL: test_mm512_mask_blend_epi16:
; KNL32: # %bb.0: # %entry
; KNL32-NEXT: vpternlogd $216, {{\.LCPI.*}}{1to16}, %zmm1, %zmm0
; KNL32-NEXT: retl
entry:
%0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
ret <32 x i16> %0