mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Remove masked vpermi2var/vpermt2var intrinsics and autoupgrade.
We have unmasked intrinsics now and wrap them with a select. This is a net reduction of 36 intrinsics from before the unmasked intrinsics were added. llvm-svn: 333388
This commit is contained in:
parent
830987e346
commit
1e3f8dcb86
@ -1012,276 +1012,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_d_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_d_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_d_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i32_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_hi_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_hi_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i16_ty],
|
||||
[llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_hi_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v32i16_ty],
|
||||
[llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_pd_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_pd_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_pd_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_ps_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_ps_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_ps_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_q_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_q_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_q_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i64_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_d_512: // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_q_512: // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_ps_512: // FIXME: Remove
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty,
|
||||
llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_pd_512: // FIXME: Remove
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty,
|
||||
llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_d_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_d_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_d_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_d_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_d_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i32_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_hi_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_hi_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_hi_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i16_ty],
|
||||
[llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_hi_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i16_ty],
|
||||
[llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_hi_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v32i16_ty],
|
||||
[llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_hi_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v32i16_ty],
|
||||
[llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_pd_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_pd_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_pd_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_pd_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_pd_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8i64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_ps_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_ps_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_ps_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_ps_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_ps_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_q_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_q_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_q_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_q_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_q_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v8i64_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_qi_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_qi_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_qi_128 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_qi_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_qi_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_qi_256 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermi2var_qi_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermt2var_qi_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vpermt2var_qi_512 : // FIXME: Remove
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def int_x86_avx512_vpermi2var_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermi2vard128">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
|
@ -267,6 +267,9 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
|
||||
Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
|
||||
Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
|
||||
Name == "sse.cvtsi2ss" || // Added in 7.0
|
||||
Name == "sse.cvtsi642ss" || // Added in 7.0
|
||||
Name == "sse2.cvtsi2sd" || // Added in 7.0
|
||||
@ -2599,6 +2602,67 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
|
||||
: CI->getArgOperand(0);
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
|
||||
Name.startswith("avx512.mask.vpermt2var.") ||
|
||||
Name.startswith("avx512.maskz.vpermt2var."))) {
|
||||
bool ZeroMask = Name[11] == 'z';
|
||||
bool IndexForm = Name[17] == 'i';
|
||||
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
|
||||
bool IsFloat = CI->getType()->isFPOrFPVectorTy();
|
||||
Intrinsic::ID IID;
|
||||
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
|
||||
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
|
||||
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
|
||||
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
|
||||
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
|
||||
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
|
||||
else if (VecWidth == 128 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
|
||||
else if (VecWidth == 128 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
|
||||
Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
|
||||
CI->getArgOperand(2) };
|
||||
|
||||
// If this isn't index form we need to swap operand 0 and 1.
|
||||
if (!IndexForm)
|
||||
std::swap(Args[0], Args[1]);
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
|
||||
Args);
|
||||
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
|
||||
: Builder.CreateBitCast(CI->getArgOperand(1),
|
||||
CI->getType());
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.") &&
|
||||
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
|
||||
// Rep will be updated by the call in the condition.
|
||||
|
@ -20515,44 +20515,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
// Swap Src1 and Src2 in the node creation
|
||||
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
|
||||
}
|
||||
case VPERM_3OP_MASKZ:
|
||||
case VPERM_3OP_MASK:{
|
||||
// Src2 is the PassThru
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
// set PassThru element
|
||||
SDValue PassThru;
|
||||
if (IntrData->Type == VPERM_3OP_MASKZ)
|
||||
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
else
|
||||
PassThru = DAG.getBitcast(VT, Src2);
|
||||
|
||||
// Swap Src1 and Src2 in the node creation
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
|
||||
dl, Op.getValueType(),
|
||||
Src2, Src1, Src3),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case VPERMI_3OP_MASK:{
|
||||
// Src2 is the PassThru
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
// set PassThru element
|
||||
SDValue PassThru = DAG.getBitcast(VT, Src2);
|
||||
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
|
||||
dl, Op.getValueType(),
|
||||
Src1, Src2, Src3),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case FMA_OP_MASK3:
|
||||
case FMA_OP_MASKZ:
|
||||
case FMA_OP_MASK: {
|
||||
|
@ -30,8 +30,7 @@ enum IntrinsicType : uint16_t {
|
||||
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_IMM8_MASK,
|
||||
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
|
||||
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
|
||||
IFMA_OP,
|
||||
VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
|
||||
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
|
||||
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
@ -1061,79 +1060,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
|
||||
@ -1277,43 +1203,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_128, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_256, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_512, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
|
||||
|
@ -4299,3 +4299,162 @@ define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i6
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vpermt2d %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2 = load <16 x i32>, <16 x i32>* %x2p
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm2
|
||||
; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2 = load <16 x i32>, <16 x i32>* %x2p
|
||||
%res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vmovapd %zmm1, %zmm2
|
||||
; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
|
||||
; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2s = load double, double* %x2ptr
|
||||
%x2ins = insertelement <8 x double> undef, double %x2s, i32 0
|
||||
%x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
@ -1955,7 +1955,7 @@ define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b
|
||||
}
|
||||
declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
declare <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
|
||||
@ -1967,65 +1967,75 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2 = load <16 x i32>, <16 x i32>* %x2p
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
|
||||
%4 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
|
||||
%res2 = add <16 x i32> %3, %4
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
|
||||
%2 = bitcast <8 x i64> %x1 to <8 x double>
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%4 = select <8 x i1> %3, <8 x double> %1, <8 x double> %2
|
||||
%5 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
|
||||
%6 = bitcast <8 x i64> %x1 to <8 x double>
|
||||
%res2 = fadd <8 x double> %4, %5
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
|
||||
%2 = bitcast <16 x i32> %x1 to <16 x float>
|
||||
%3 = bitcast i16 %x3 to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2
|
||||
%5 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
|
||||
%6 = bitcast <16 x i32> %x1 to <16 x float>
|
||||
%res2 = fadd <16 x float> %4, %5
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
|
||||
%4 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
|
||||
%res2 = add <8 x i64> %3, %4
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
@ -2036,14 +2046,14 @@ define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2 = load <16 x i32>, <16 x i32>* %x2p
|
||||
%res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
|
||||
%4 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x1)
|
||||
%res2 = add <16 x i32> %3, %4
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
|
||||
; CHECK: ## %bb.0:
|
||||
@ -2056,61 +2066,62 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <
|
||||
%x2s = load double, double* %x2ptr
|
||||
%x2ins = insertelement <8 x double> undef, double %x2s, i32 0
|
||||
%x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
|
||||
%4 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x1)
|
||||
%res2 = fadd <8 x double> %3, %4
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
%1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||
%4 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2)
|
||||
%res2 = fadd <16 x float> %3, %4
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
|
||||
%4 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
|
||||
%res2 = add <8 x i64> %3, %4
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
|
||||
%4 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
|
||||
%res2 = add <16 x i32> %3, %4
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
|
@ -2787,3 +2787,81 @@ define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x
|
||||
%res4 = add <32 x i16> %res3, %res2
|
||||
ret <32 x i16> %res4
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res2 = add <32 x i16> %res, %res1
|
||||
ret <32 x i16> %res2
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res2 = add <32 x i16> %res, %res1
|
||||
ret <32 x i16> %res2
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res2 = add <32 x i16> %res, %res1
|
||||
ret <32 x i16> %res2
|
||||
}
|
||||
|
@ -995,81 +995,83 @@ define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res2 = add <32 x i16> %res, %res1
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
|
||||
%4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
|
||||
%res2 = add <32 x i16> %3, %4
|
||||
ret <32 x i16> %res2
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res2 = add <32 x i16> %res, %res1
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
|
||||
%4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
|
||||
%res2 = add <32 x i16> %3, %4
|
||||
ret <32 x i16> %res2
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res2 = add <32 x i16> %res, %res1
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
|
||||
%4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
|
||||
%res2 = add <32 x i16> %3, %4
|
||||
ret <32 x i16> %res2
|
||||
}
|
||||
|
||||
|
@ -4050,3 +4050,105 @@ define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x
|
||||
%res4 = add <16 x i16> %res3, %res2
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res2 = add <8 x i16> %res, %res1
|
||||
ret <8 x i16> %res2
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res2 = add <8 x i16> %res, %res1
|
||||
ret <8 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res2 = add <16 x i16> %res, %res1
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res2 = add <16 x i16> %res, %res1
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res2 = add <8 x i16> %res, %res1
|
||||
ret <8 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res2 = add <16 x i16> %res, %res1
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
@ -1798,105 +1798,109 @@ define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b,
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res2 = add <8 x i16> %res, %res1
|
||||
%1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
|
||||
%4 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
|
||||
%res2 = add <8 x i16> %3, %4
|
||||
ret <8 x i16> %res2
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res2 = add <8 x i16> %res, %res1
|
||||
%1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
|
||||
%4 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
|
||||
%res2 = add <8 x i16> %3, %4
|
||||
ret <8 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res2 = add <16 x i16> %res, %res1
|
||||
%1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
|
||||
%4 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
|
||||
%res2 = add <16 x i16> %3, %4
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res2 = add <16 x i16> %res, %res1
|
||||
%1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
|
||||
%4 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
|
||||
%res2 = add <16 x i16> %3, %4
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res2 = add <8 x i16> %res, %res1
|
||||
%1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
|
||||
%4 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
|
||||
%res2 = add <8 x i16> %3, %4
|
||||
ret <8 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res2 = add <16 x i16> %res, %res1
|
||||
%1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
|
||||
%4 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
|
||||
%res2 = add <16 x i16> %3, %4
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
|
@ -19,3 +19,59 @@ define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8
|
||||
%res4 = add <64 x i8> %res3, %res2
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddb %zmm3, %zmm4, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
|
||||
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
%res3 = add <64 x i8> %res, %res1
|
||||
%res4 = add <64 x i8> %res3, %res2
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddb %zmm3, %zmm4, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
|
||||
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
%res3 = add <64 x i8> %res, %res1
|
||||
%res4 = add <64 x i8> %res3, %res2
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
@ -45,58 +45,64 @@ define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
declare <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddb %zmm3, %zmm4, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
|
||||
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
%res3 = add <64 x i8> %res, %res1
|
||||
%res4 = add <64 x i8> %res3, %res2
|
||||
%1 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2)
|
||||
%2 = bitcast i64 %x3 to <64 x i1>
|
||||
%3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x1
|
||||
%4 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2)
|
||||
%5 = bitcast i64 %x3 to <64 x i1>
|
||||
%6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer
|
||||
%7 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2)
|
||||
%res3 = add <64 x i8> %3, %6
|
||||
%res4 = add <64 x i8> %res3, %7
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
|
||||
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
%res3 = add <64 x i8> %res, %res1
|
||||
%res4 = add <64 x i8> %res3, %res2
|
||||
; CHECK-NEXT: vpaddb %zmm3, %zmm4, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: ret
|
||||
%1 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x1, <64 x i8> %x0, <64 x i8> %x2)
|
||||
%2 = bitcast i64 %x3 to <64 x i1>
|
||||
%3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x1
|
||||
%4 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> zeroinitializer, <64 x i8> %x0, <64 x i8> %x2)
|
||||
%5 = bitcast i64 %x3 to <64 x i1>
|
||||
%6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer
|
||||
%7 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x1, <64 x i8> %x0, <64 x i8> %x2)
|
||||
%res3 = add <64 x i8> %3, %6
|
||||
%res4 = add <64 x i8> %res3, %7
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
ret <64 x i8> %res
|
||||
%1 = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x1, <64 x i8> %x0, <64 x i8> %x2)
|
||||
%2 = bitcast i64 %x3 to <64 x i1>
|
||||
%3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
|
||||
ret <64 x i8> %3
|
||||
}
|
||||
|
@ -40,3 +40,115 @@ define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
|
||||
; CHECK-NEXT: vpaddb %xmm3, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
%res3 = add <16 x i8> %res, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
|
||||
; CHECK-NEXT: vpaddb %ymm3, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
%res3 = add <32 x i8> %res, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
|
||||
; CHECK-NEXT: vpaddb %xmm3, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
%res3 = add <16 x i8> %res, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
|
||||
; CHECK-NEXT: vpaddb %ymm3, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
%res3 = add <32 x i8> %res, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
@ -89,114 +89,126 @@ define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
|
||||
; CHECK-NEXT: vpaddb %xmm3, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
%res3 = add <16 x i8> %res, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
%1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1
|
||||
%4 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2)
|
||||
%5 = bitcast i16 %x3 to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer
|
||||
%7 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2)
|
||||
%res3 = add <16 x i8> %3, %6
|
||||
%res4 = add <16 x i8> %res3, %7
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
|
||||
; CHECK-NEXT: vpaddb %ymm3, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
%res3 = add <32 x i8> %res, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
%1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1
|
||||
%4 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2)
|
||||
%5 = bitcast i32 %x3 to <32 x i1>
|
||||
%6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer
|
||||
%7 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2)
|
||||
%res3 = add <32 x i8> %3, %6
|
||||
%res4 = add <32 x i8> %res3, %7
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
|
||||
; CHECK-NEXT: vpaddb %xmm3, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
%res3 = add <16 x i8> %res, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
%1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1
|
||||
%4 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> zeroinitializer, <16 x i8> %x0, <16 x i8> %x2)
|
||||
%5 = bitcast i16 %x3 to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer
|
||||
%7 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2)
|
||||
%res3 = add <16 x i8> %3, %6
|
||||
%res4 = add <16 x i8> %res3, %7
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xda]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
|
||||
; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc1]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
|
||||
; CHECK-NEXT: vpaddb %ymm3, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
%res3 = add <32 x i8> %res, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
%1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1
|
||||
%4 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> zeroinitializer, <32 x i8> %x0, <32 x i8> %x2)
|
||||
%5 = bitcast i32 %x3 to <32 x i1>
|
||||
%6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer
|
||||
%7 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2)
|
||||
%res3 = add <32 x i8> %3, %6
|
||||
%res4 = add <32 x i8> %res3, %7
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
ret <16 x i8> %res
|
||||
%1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
ret <32 x i8> %res
|
||||
%1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
|
@ -6985,3 +6985,287 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x f
|
||||
%res2 = fadd <8 x float> %res, %res1
|
||||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
|
||||
%res2 = fadd <2 x double> %res, %res1
|
||||
ret <2 x double> %res2
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
|
||||
; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
|
||||
%res2 = fadd <4 x double> %res, %res1
|
||||
ret <4 x double> %res2
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
|
||||
%res2 = fadd <4 x float> %res, %res1
|
||||
ret <4 x float> %res2
|
||||
}
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%x1cast = bitcast <2 x i64> %x1 to <4 x i32>
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
|
||||
; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
|
||||
%res2 = fadd <8 x float> %res, %res1
|
||||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
|
||||
%res2 = add <2 x i64> %res, %res1
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
|
||||
%res2 = add <2 x i64> %res, %res1
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
|
||||
%res2 = add <2 x i64> %res, %res1
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
|
||||
%res2 = add <4 x i64> %res, %res1
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
|
||||
%res2 = add <4 x i64> %res, %res1
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
|
||||
%res2 = add <4 x i64> %res, %res1
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
@ -528,122 +528,178 @@ define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
declare <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
|
||||
%4 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
|
||||
%res2 = add <4 x i32> %3, %4
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
%1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
|
||||
%4 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
|
||||
%res2 = add <4 x i32> %3, %4
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
%1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
|
||||
%4 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
|
||||
%res2 = add <4 x i32> %3, %4
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
declare <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
|
||||
%4 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
|
||||
%res2 = add <8 x i32> %3, %4
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
%1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
|
||||
%4 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
|
||||
%res2 = add <8 x i32> %3, %4
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
%1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
|
||||
%4 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
|
||||
%res2 = add <8 x i32> %3, %4
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
|
||||
declare <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
|
||||
%res2 = fadd <2 x double> %res, %res1
|
||||
%1 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
|
||||
%2 = bitcast <2 x i64> %x1 to <2 x double>
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
|
||||
%4 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %2
|
||||
%5 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
|
||||
%6 = bitcast <2 x i64> %x1 to <2 x double>
|
||||
%res2 = fadd <2 x double> %4, %5
|
||||
ret <2 x double> %res2
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
|
||||
declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
|
||||
; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
|
||||
%res2 = fadd <4 x double> %res, %res1
|
||||
%1 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
|
||||
%2 = bitcast <4 x i64> %x1 to <4 x double>
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%4 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %2
|
||||
%5 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
|
||||
%6 = bitcast <4 x i64> %x1 to <4 x double>
|
||||
%res2 = fadd <4 x double> %4, %5
|
||||
ret <4 x double> %res2
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
|
||||
declare <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
|
||||
%res2 = fadd <4 x float> %res, %res1
|
||||
%1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
|
||||
%2 = bitcast <4 x i32> %x1 to <4 x float>
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
|
||||
%5 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
|
||||
%6 = bitcast <4 x i32> %x1 to <4 x float>
|
||||
%res2 = fadd <4 x float> %4, %5
|
||||
ret <4 x float> %res2
|
||||
}
|
||||
|
||||
@ -655,27 +711,147 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%x1cast = bitcast <2 x i64> %x1 to <4 x i32>
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2)
|
||||
%2 = bitcast <4 x i32> %x1cast to <4 x float>
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
|
||||
ret <4 x float> %4
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
|
||||
declare <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
|
||||
; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
|
||||
%res2 = fadd <8 x float> %res, %res1
|
||||
%1 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
|
||||
%2 = bitcast <8 x i32> %x1 to <8 x float>
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%4 = select <8 x i1> %3, <8 x float> %1, <8 x float> %2
|
||||
%5 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
|
||||
%6 = bitcast <8 x i32> %x1 to <8 x float>
|
||||
%res2 = fadd <8 x float> %4, %5
|
||||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
|
||||
%3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
|
||||
%4 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
|
||||
%res2 = add <2 x i64> %3, %4
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
|
||||
%3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
|
||||
%4 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
|
||||
%res2 = add <2 x i64> %3, %4
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
|
||||
%3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
|
||||
%4 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
|
||||
%res2 = add <2 x i64> %3, %4
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
|
||||
%4 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
|
||||
%res2 = add <4 x i64> %3, %4
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
|
||||
%4 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
|
||||
%res2 = add <4 x i64> %3, %4
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
|
||||
%4 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
|
||||
%res2 = add <4 x i64> %3, %4
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
|
||||
|
@ -124,9 +124,9 @@ define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x dou
|
||||
define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_8f64_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k1
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
||||
; X32-NEXT: vpermi2pd %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -134,8 +134,8 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8f64_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X64-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
|
||||
; X64-NEXT: vpermi2pd %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -205,9 +205,9 @@ define <8 x i64> @combine_vpermt2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1)
|
||||
define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_8i64_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k1
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
||||
; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -215,8 +215,8 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8i64_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
|
||||
; X64-NEXT: vpermi2q %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -241,8 +241,8 @@ define <16 x float> @combine_vpermt2var_16f32_identity(<16 x float> %x0, <16 x f
|
||||
define <16 x float> @combine_vpermt2var_16f32_identity_mask(<16 x float> %x0, <16 x float> %x1, i16 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_16f32_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vmovaps {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X32-NEXT: vmovaps {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X32-NEXT: vpermi2ps %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -250,8 +250,8 @@ define <16 x float> @combine_vpermt2var_16f32_identity_mask(<16 x float> %x0, <1
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_16f32_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovaps {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X64-NEXT: vmovaps {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X64-NEXT: vpermi2ps %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -300,15 +300,15 @@ define <16 x float> @combine_vpermt2var_16f32_vmovddup_load(<16 x float> *%p0, <
|
||||
define <16 x float> @combine_vpermt2var_16f32_vmovddup_mask(<16 x float> %x0, <16 x float> %x1, i16 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_16f32_vmovddup_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_16f32_vmovddup_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 12, i32 13>, <16 x float> %x0, <16 x float> %x1, i16 %m)
|
||||
@ -317,19 +317,19 @@ define <16 x float> @combine_vpermt2var_16f32_vmovddup_mask(<16 x float> %x0, <1
|
||||
define <16 x float> @combine_vpermt2var_16f32_vmovddup_mask_load(<16 x float> *%p0, <16 x float> %x1, i16 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_16f32_vmovddup_mask_load:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovaps (%eax), %zmm2
|
||||
; X32-NEXT: vmovaps {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 {%k1} {z}
|
||||
; X32-NEXT: vmovaps %zmm1, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_16f32_vmovddup_mask_load:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1
|
||||
; X64-NEXT: vmovaps (%rdi), %zmm2
|
||||
; X64-NEXT: vmovaps {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
|
||||
; X64-NEXT: kmovd %esi, %k1
|
||||
; X64-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 {%k1} {z}
|
||||
; X64-NEXT: vmovaps %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
@ -519,8 +519,8 @@ define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32>
|
||||
define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x i32> %x1, i16 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_16i32_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X32-NEXT: vpermi2d %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -528,8 +528,8 @@ define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_16i32_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X64-NEXT: vpermi2d %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -554,8 +554,8 @@ define <32 x i16> @combine_vpermt2var_32i16_identity(<32 x i16> %x0, <32 x i16>
|
||||
define <32 x i16> @combine_vpermt2var_32i16_identity_mask(<32 x i16> %x0, <32 x i16> %x1, i32 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_32i16_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
|
||||
; X32-NEXT: vpermi2w %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
@ -563,8 +563,8 @@ define <32 x i16> @combine_vpermt2var_32i16_identity_mask(<32 x i16> %x0, <32 x
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_32i16_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
|
||||
; X64-NEXT: vpermi2w %zmm2, %zmm2, %zmm0 {%k1} {z}
|
||||
|
@ -20,8 +20,8 @@ define <16 x i16> @combine_vpermt2var_16i16_identity(<16 x i16> %x0, <16 x i16>
|
||||
define <16 x i16> @combine_vpermt2var_16i16_identity_mask(<16 x i16> %x0, <16 x i16> %x1, i16 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_16i16_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z}
|
||||
; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X32-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z}
|
||||
@ -29,8 +29,8 @@ define <16 x i16> @combine_vpermt2var_16i16_identity_mask(<16 x i16> %x0, <16 x
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_16i16_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X64-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z}
|
||||
|
@ -33,8 +33,8 @@ define <16 x i8> @combine_vpermt2var_16i8_identity(<16 x i8> %x0, <16 x i8> %x1)
|
||||
define <16 x i8> @combine_vpermt2var_16i8_identity_mask(<16 x i8> %x0, <16 x i8> %x1, i16 %m) {
|
||||
; X32-LABEL: combine_vpermt2var_16i8_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z}
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z}
|
||||
@ -42,8 +42,8 @@ define <16 x i8> @combine_vpermt2var_16i8_identity_mask(<16 x i8> %x0, <16 x i8>
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_16i8_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
|
||||
; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z}
|
||||
|
Loading…
x
Reference in New Issue
Block a user