mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AVX-512] Replace masked 16-bit element variable shift intrinsics with new unmasked versions and selects.
The same thing was done to 32-bit and 64-bit element sizes previously. This will allow us to support these shuffls in InstCombineCalls along with the other variable shift intrinsics. llvm-svn: 287312
This commit is contained in:
parent
cf561a0fd0
commit
8578a62eac
@ -1844,16 +1844,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psllv16_hi : GCCBuiltin<"__builtin_ia32_psllv16hi_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psllv32hi : GCCBuiltin<"__builtin_ia32_psllv32hi_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psllv8_hi : GCCBuiltin<"__builtin_ia32_psllv8hi_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pmultishift_qb_128:
|
||||
GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
@ -2320,25 +2310,35 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psrav16_hi : GCCBuiltin<"__builtin_ia32_psrav16hi_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrav32_hi : GCCBuiltin<"__builtin_ia32_psrav32hi_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrav8_hi : GCCBuiltin<"__builtin_ia32_psrav8hi_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_psllv_w_128 : GCCBuiltin<"__builtin_ia32_psllv8hi">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psllv_w_256 : GCCBuiltin<"__builtin_ia32_psllv16hi">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psllv_w_512 : GCCBuiltin<"__builtin_ia32_psllv32hi">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_psrlv_w_128 : GCCBuiltin<"__builtin_ia32_psrlv8hi">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psrlv_w_256 : GCCBuiltin<"__builtin_ia32_psrlv16hi">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psrlv_w_512 : GCCBuiltin<"__builtin_ia32_psrlv32hi">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_psrav_w_128 : GCCBuiltin<"__builtin_ia32_psrav8hi">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psrav_w_256 : GCCBuiltin<"__builtin_ia32_psrav16hi">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
|
||||
|
@ -325,19 +325,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name.startswith("avx512.mask.pslli") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psrai") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psrli") || // Added in 4.0
|
||||
Name == "avx512.mask.psllv2.di" || // Added in 4.0
|
||||
Name == "avx512.mask.psllv4.di" || // Added in 4.0
|
||||
Name == "avx512.mask.psllv4.si" || // Added in 4.0
|
||||
Name == "avx512.mask.psllv8.si" || // Added in 4.0
|
||||
Name == "avx512.mask.psrav4.si" || // Added in 4.0
|
||||
Name == "avx512.mask.psrav8.si" || // Added in 4.0
|
||||
Name == "avx512.mask.psrlv2.di" || // Added in 4.0
|
||||
Name == "avx512.mask.psrlv4.di" || // Added in 4.0
|
||||
Name == "avx512.mask.psrlv4.si" || // Added in 4.0
|
||||
Name == "avx512.mask.psrlv8.si" || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psllv.") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psrav.") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psrlv.") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psllv") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psrav") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psrlv") || // Added in 4.0
|
||||
Name.startswith("sse41.pmovsx") || // Added in 3.8
|
||||
Name.startswith("sse41.pmovzx") || // Added in 3.9
|
||||
Name.startswith("avx2.pmovsx") || // Added in 3.9
|
||||
@ -1439,7 +1429,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
bool IsVariable = Name[16] == 'v';
|
||||
char Size = Name[16] == '.' ? Name[17] :
|
||||
Name[17] == '.' ? Name[18] :
|
||||
Name[19];
|
||||
Name[18] == '.' ? Name[19] :
|
||||
Name[20];
|
||||
|
||||
Intrinsic::ID IID;
|
||||
if (IsVariable && Name[17] != '.') {
|
||||
@ -1451,6 +1442,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
IID = Intrinsic::x86_avx2_psllv_d;
|
||||
else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
|
||||
IID = Intrinsic::x86_avx2_psllv_d_256;
|
||||
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
|
||||
IID = Intrinsic::x86_avx512_psllv_w_128;
|
||||
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
|
||||
IID = Intrinsic::x86_avx512_psllv_w_256;
|
||||
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
|
||||
IID = Intrinsic::x86_avx512_psllv_w_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected size");
|
||||
} else if (Name.endswith(".128")) {
|
||||
@ -1500,7 +1497,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
bool IsVariable = Name[16] == 'v';
|
||||
char Size = Name[16] == '.' ? Name[17] :
|
||||
Name[17] == '.' ? Name[18] :
|
||||
Name[19];
|
||||
Name[18] == '.' ? Name[19] :
|
||||
Name[20];
|
||||
|
||||
Intrinsic::ID IID;
|
||||
if (IsVariable && Name[17] != '.') {
|
||||
@ -1512,6 +1510,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
IID = Intrinsic::x86_avx2_psrlv_d;
|
||||
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
|
||||
IID = Intrinsic::x86_avx2_psrlv_d_256;
|
||||
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
|
||||
IID = Intrinsic::x86_avx512_psrlv_w_128;
|
||||
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
|
||||
IID = Intrinsic::x86_avx512_psrlv_w_256;
|
||||
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
|
||||
IID = Intrinsic::x86_avx512_psrlv_w_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected size");
|
||||
} else if (Name.endswith(".128")) {
|
||||
@ -1561,7 +1565,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
bool IsVariable = Name[16] == 'v';
|
||||
char Size = Name[16] == '.' ? Name[17] :
|
||||
Name[17] == '.' ? Name[18] :
|
||||
Name[19];
|
||||
Name[18] == '.' ? Name[19] :
|
||||
Name[20];
|
||||
|
||||
Intrinsic::ID IID;
|
||||
if (IsVariable && Name[17] != '.') {
|
||||
@ -1569,6 +1574,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
IID = Intrinsic::x86_avx2_psrav_d;
|
||||
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
|
||||
IID = Intrinsic::x86_avx2_psrav_d_256;
|
||||
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
|
||||
IID = Intrinsic::x86_avx512_psrav_w_128;
|
||||
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
|
||||
IID = Intrinsic::x86_avx512_psrav_w_256;
|
||||
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
|
||||
IID = Intrinsic::x86_avx512_psrav_w_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected size");
|
||||
} else if (Name.endswith(".128")) {
|
||||
|
@ -1057,15 +1057,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PSHUFB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psllv16_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psllv32hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psllv8_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrav16_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrav32_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv16_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv32hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv8_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
@ -1493,6 +1484,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
|
||||
@ -1507,6 +1501,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_psrav_q_128, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrav_q_256, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrav_q_512, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrav_w_128, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrav_w_256, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrav_w_512, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrl_d_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrl_q_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrl_w_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
|
||||
@ -1515,6 +1512,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_ptestm_b_128, CMP_MASK, X86ISD::TESTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_ptestm_b_256, CMP_MASK, X86ISD::TESTM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_ptestm_b_512, CMP_MASK, X86ISD::TESTM, 0),
|
||||
|
@ -2288,22 +2288,22 @@ declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x
|
||||
define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
@ -2318,22 +2318,22 @@ declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32
|
||||
define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
@ -2366,22 +2366,22 @@ declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x
|
||||
define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_psllv32hi:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
|
@ -4440,12 +4440,12 @@ declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16
|
||||
define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x10,0xd9]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
|
||||
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xd9]
|
||||
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
|
||||
@ -4460,12 +4460,12 @@ declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16
|
||||
define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x10,0xd9]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
|
||||
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xd9]
|
||||
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
|
||||
@ -4480,12 +4480,12 @@ declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16
|
||||
define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_psrav16_hi:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x11,0xd9]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
|
||||
; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xd9]
|
||||
; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
|
||||
@ -4500,12 +4500,12 @@ declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16
|
||||
define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_hi:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x11,0xd9]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
|
||||
; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xd9]
|
||||
; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
|
||||
@ -4520,12 +4520,12 @@ declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16
|
||||
define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_psllv16_hi:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x12,0xd9]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
|
||||
; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xd9]
|
||||
; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
|
||||
@ -4540,12 +4540,12 @@ declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16
|
||||
define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_hi:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x12,0xd9]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
|
||||
; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xd9]
|
||||
; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
|
||||
|
Loading…
Reference in New Issue
Block a user