mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86][XOP] Support for VPERMIL2PD/VPERMIL2PS 2-input shuffle instructions
This patch begins adding support for lowering to the XOP VPERMIL2PD/VPERMIL2PS shuffle instructions - adding the X86ISD::VPERMIL2 opcode and cleaning up the usage. The internal llvm intrinsics were assuming the shuffle mask operand was the same type as the float/double input operands (I guess to simplify the intrinsic definitions in X86InstrXOP.td to a single value type). These needed changing to integer types (matching the clang builtin and the AMD intrinsics definitions), an auto upgrade path is added to convert old calls. Mask decoding/target shuffle support will be added in future patches. Differential Revision: http://reviews.llvm.org/D20049 llvm-svn: 271633
This commit is contained in:
parent
1981be1e4f
commit
0c614eb08a
@ -3873,23 +3873,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
||||
def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty],
|
||||
llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_xop_vpermil2pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty],
|
||||
llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty],
|
||||
llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_xop_vpermil2ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty],
|
||||
llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
|
||||
|
@ -282,6 +282,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
NewFn = F;
|
||||
return true;
|
||||
}
|
||||
// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
|
||||
if (Name.startswith("x86.xop.vpermil2")) {
|
||||
auto Params = F->getFunctionType()->params();
|
||||
auto Idx = Params[2];
|
||||
if (Idx->getScalarType()->isFloatingPointTy()) {
|
||||
F->setName(Name + ".old");
|
||||
unsigned IdxSize = Idx->getPrimitiveSizeInBits();
|
||||
unsigned EltSize = Idx->getScalarSizeInBits();
|
||||
Intrinsic::ID Permil2ID;
|
||||
if (EltSize == 64 && IdxSize == 128)
|
||||
Permil2ID = Intrinsic::x86_xop_vpermil2pd;
|
||||
else if (EltSize == 32 && IdxSize == 128)
|
||||
Permil2ID = Intrinsic::x86_xop_vpermil2ps;
|
||||
else if (EltSize == 64 && IdxSize == 256)
|
||||
Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
|
||||
else
|
||||
Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -911,6 +932,20 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
|
||||
case Intrinsic::x86_xop_vpermil2pd:
|
||||
case Intrinsic::x86_xop_vpermil2ps:
|
||||
case Intrinsic::x86_xop_vpermil2pd_256:
|
||||
case Intrinsic::x86_xop_vpermil2ps_256: {
|
||||
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
|
||||
CI->arg_operands().end());
|
||||
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
|
||||
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
|
||||
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
|
||||
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_sse41_ptestc:
|
||||
case Intrinsic::x86_sse41_ptestz:
|
||||
case Intrinsic::x86_sse41_ptestnzc: {
|
||||
|
@ -21947,6 +21947,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::VPSHL: return "X86ISD::VPSHL";
|
||||
case X86ISD::VPCOM: return "X86ISD::VPCOM";
|
||||
case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
|
||||
case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2";
|
||||
case X86ISD::FMADD: return "X86ISD::FMADD";
|
||||
case X86ISD::FMSUB: return "X86ISD::FMSUB";
|
||||
case X86ISD::FNMADD: return "X86ISD::FNMADD";
|
||||
|
@ -451,6 +451,8 @@ namespace llvm {
|
||||
VPCOM, VPCOMU,
|
||||
// XOP packed permute bytes.
|
||||
VPPERM,
|
||||
// XOP two source permutation.
|
||||
VPERMIL2,
|
||||
|
||||
// Vector multiply packed unsigned doubleword integers.
|
||||
PMULUDQ,
|
||||
|
@ -245,7 +245,12 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU",
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>,
|
||||
SDTCisVT<3, i8>]>>;
|
||||
|
||||
def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
|
||||
SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>,
|
||||
SDTCisSameSizeAs<0,3>,
|
||||
SDTCisSameNumEltsAs<0, 3>,
|
||||
SDTCisVT<4, i8>]>>;
|
||||
def X86vpperm : SDNode<"X86ISD::VPPERM",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
|
@ -342,27 +342,34 @@ let Predicates = [HasXOP] in {
|
||||
(VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
}
|
||||
|
||||
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
|
||||
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
|
||||
multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType vt128, ValueType vt256,
|
||||
ValueType id128, ValueType id256,
|
||||
PatFrag ld_128, PatFrag ld_256> {
|
||||
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set VR128:$dst,
|
||||
(Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
(id128 VR128:$src3), (i8 imm:$src4))))]>;
|
||||
def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, f128mem:$src3, u8imm:$src4),
|
||||
(ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set VR128:$dst,
|
||||
(Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
(id128 (bitconvert (loadv2i64 addr:$src3))),
|
||||
(i8 imm:$src4))))]>,
|
||||
VEX_W, MemOp4;
|
||||
def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set VR128:$dst,
|
||||
(Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
|
||||
(vt128 (OpNode (vt128 VR128:$src1),
|
||||
(vt128 (bitconvert (ld_128 addr:$src2))),
|
||||
(id128 VR128:$src3), (i8 imm:$src4))))]>;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rr_REV : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
@ -376,21 +383,24 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set VR256:$dst,
|
||||
(Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
|
||||
(vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
|
||||
(id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
|
||||
def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, f256mem:$src3, u8imm:$src4),
|
||||
(ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set VR256:$dst,
|
||||
(Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
|
||||
VEX_W, MemOp4, VEX_L;
|
||||
(vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
|
||||
(id256 (bitconvert (loadv4i64 addr:$src3))),
|
||||
(i8 imm:$src4))))]>, VEX_W, MemOp4, VEX_L;
|
||||
def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set VR256:$dst,
|
||||
(Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>,
|
||||
VEX_L;
|
||||
(vt256 (OpNode (vt256 VR256:$src1),
|
||||
(vt256 (bitconvert (ld_256 addr:$src2))),
|
||||
(id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rrY_REV : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
@ -401,10 +411,10 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
|
||||
int_x86_xop_vpermil2pd_256, loadv2f64, loadv4f64>;
|
||||
defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64,
|
||||
v2i64, v4i64, loadv2f64, loadv4f64>;
|
||||
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
|
||||
int_x86_xop_vpermil2ps_256, loadv4f32, loadv8f32>;
|
||||
defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32,
|
||||
v4i32, v8i32, loadv4f32, loadv8f32>;
|
||||
|
||||
|
@ -2234,6 +2234,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
|
||||
X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
|
||||
X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
|
||||
|
@ -166,69 +166,69 @@ define <8 x i16> @stack_fold_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @stack_fold_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
define <2 x double> @stack_fold_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2pd_rm
|
||||
;CHECK: vpermil2pd $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 0)
|
||||
%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
define <2 x double> @stack_fold_vpermil2pd_mr(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
define <2 x double> @stack_fold_vpermil2pd_mr(<2 x double> %a0, <2 x i64> %a1, <2 x double> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2pd_mr
|
||||
;CHECK: vpermil2pd $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a2, <2 x double> %a1, i8 0)
|
||||
%2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a2, <2 x i64> %a1, i8 0)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @stack_fold_vpermil2pd_rm_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
||||
define <4 x double> @stack_fold_vpermil2pd_rm_ymm(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2pd_rm
|
||||
;CHECK: vpermil2pd $0, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 0)
|
||||
%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
define <4 x double> @stack_fold_vpermil2pd_mr_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
||||
define <4 x double> @stack_fold_vpermil2pd_mr_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x double> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2pd_mr
|
||||
;CHECK: vpermil2pd $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a2, <4 x double> %a1, i8 0)
|
||||
%2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a2, <4 x i64> %a1, i8 0)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @stack_fold_vpermil2ps_rm(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
define <4 x float> @stack_fold_vpermil2ps_rm(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2ps_rm
|
||||
;CHECK: vpermil2ps $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 0)
|
||||
%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 0)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
define <4 x float> @stack_fold_vpermil2ps_mr(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
define <4 x float> @stack_fold_vpermil2ps_mr(<4 x float> %a0, <4 x i32> %a1, <4 x float> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2ps_mr
|
||||
;CHECK: vpermil2ps $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a2, <4 x float> %a1, i8 0)
|
||||
%2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a2, <4 x i32> %a1, i8 0)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @stack_fold_vpermil2ps_rm_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
|
||||
define <8 x float> @stack_fold_vpermil2ps_rm_ymm(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2ps_rm
|
||||
;CHECK: vpermil2ps $0, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 0)
|
||||
%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 0)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
define <8 x float> @stack_fold_vpermil2ps_mr_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
|
||||
define <8 x float> @stack_fold_vpermil2ps_mr_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x float> %a2) {
|
||||
;CHECK-LABEL: stack_fold_vpermil2ps_mr
|
||||
;CHECK: vpermil2ps $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a2, <8 x float> %a1, i8 0)
|
||||
%2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a2, <8 x i32> %a1, i8 0)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_vphaddbd(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_vphaddbd
|
||||
|
@ -2,63 +2,60 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
|
||||
|
||||
declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2pd_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; CHECK-NEXT: movl $2, %eax
|
||||
; CHECK-NEXT: vmovq %rax, %xmm2
|
||||
; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast <2 x i64> <i64 2, i64 0> to <2 x double>
|
||||
%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a1, <2 x double> %a0, <2 x double> %mask, i8 0)
|
||||
%res1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %res0, <2 x double> undef, <2 x double> %mask, i8 0)
|
||||
%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a1, <2 x double> %a0, <2 x i64> <i64 2, i64 0>, i8 0)
|
||||
%res1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %res0, <2 x double> undef, <2 x i64> <i64 2, i64 0>, i8 0)
|
||||
ret <2 x double> %res1
|
||||
}
|
||||
|
||||
define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x double> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2pd256_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [9.881313e-324,0.000000e+00,9.881313e-324,0.000000e+00]
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,0,2,0]
|
||||
; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast <4 x i64> <i64 2, i64 0, i64 2, i64 0> to <4 x double>
|
||||
%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x double> %mask, i8 0)
|
||||
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x double> %mask, i8 0)
|
||||
%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
|
||||
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
|
||||
ret <4 x double> %res1
|
||||
}
|
||||
|
||||
define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [4.203895e-45,2.802597e-45,1.401298e-45,0.000000e+00]
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [3,2,1,0]
|
||||
; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast <4 x i32> <i32 3, i32 2, i32 1, i32 0> to <4 x float>
|
||||
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x float> %mask, i8 0)
|
||||
%res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x float> %mask, i8 0)
|
||||
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
|
||||
%res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
|
||||
ret <4 x float> %res1
|
||||
}
|
||||
|
||||
define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps256_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4.203895e-45,2.802597e-45,1.401298e-45,0.000000e+00,1.401298e-45,0.000000e+00,4.203895e-45,2.802597e-45]
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,1,0,3,2]
|
||||
; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2> to <8 x float>
|
||||
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x float> %mask, i8 0)
|
||||
%res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x float> %mask, i8 0)
|
||||
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
|
||||
%res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
|
||||
ret <8 x float> %res1
|
||||
}
|
||||
|
||||
@ -67,8 +64,7 @@ define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x flo
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps $2, {{.*}}(%rip), %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast <4 x i32> <i32 8, i32 1, i32 2, i32 3> to <4 x float>
|
||||
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %mask, i8 2)
|
||||
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 8, i32 1, i32 2, i32 3>, i8 2)
|
||||
ret <4 x float> %res0
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,82 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s
|
||||
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ; [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <2 x double>, <2 x double>* %a1
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <2 x double>, <2 x double>* %a2
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
|
||||
ret <4 x double> %res
|
||||
}
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <4 x double>, <4 x double>* %a1
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
|
||||
ret <4 x double> %res
|
||||
}
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <4 x double>, <4 x double>* %a2
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2ps:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpcomeqb:
|
||||
; CHECK: # BB#0:
|
||||
|
@ -1,81 +1,81 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s
|
||||
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ; [#uses=1]
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 1) ; [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x i64> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <2 x double>, <2 x double>* %a1
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1]
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x i64> %a2, i8 1) ; [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
|
||||
define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x i64>* %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <2 x double>, <2 x double>* %a2
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1]
|
||||
%vec = load <2 x i64>, <2 x i64>* %a2
|
||||
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %vec, i8 1) ; [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 2) ;
|
||||
ret <4 x double> %res
|
||||
}
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x i64> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <4 x double>, <4 x double>* %a1
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x i64> %a2, i8 2) ;
|
||||
ret <4 x double> %res
|
||||
}
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
|
||||
define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x i64>* %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <4 x double>, <4 x double>* %a2
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
|
||||
%vec = load <4 x i64>, <4 x i64>* %a2
|
||||
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %vec, i8 2) ;
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2ps:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
|
||||
%res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 3) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
|
||||
define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
|
||||
%res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 4) ;
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
|
||||
|
||||
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
|
||||
; CHECK-LABEL: test_int_x86_xop_vpcmov:
|
||||
|
Loading…
x
Reference in New Issue
Block a user