1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[X86] Auto upgrade VPCOM/VPCOMU intrinsics to generic integer comparisons

This causes a couple of changes in the upgrade tests as signed/unsigned eq/ne are equivalent and we constant fold true/false codes, these changes are the same as what we already do for avx512 cmp/ucmp.

Noticed while cleaning up vector integer comparison costs for PR40376.

llvm-svn: 351697
This commit is contained in:
Simon Pilgrim 2019-01-20 19:27:40 +00:00
parent 12837ca780
commit 0b80866b7e
7 changed files with 113 additions and 200 deletions

View File

@ -1908,31 +1908,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_xop_vpcomb :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomd :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomub :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomud :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbd :
GCCBuiltin<"__builtin_ia32_vphaddbd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;

View File

@ -361,8 +361,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "xop.vpcmov.256" || // Added in 5.0
Name.startswith("avx512.mask.move.s") || // Added in 4.0
Name.startswith("avx512.cvtmask2") || // Added in 5.0
(Name.startswith("xop.vpcom") && // Added in 3.2
F->arg_size() == 2) ||
Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
Name.startswith("xop.vprot") || // Added in 8.0
Name.startswith("avx512.prol") || // Added in 8.0
Name.startswith("avx512.pror") || // Added in 8.0
@ -2038,26 +2037,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
else
llvm_unreachable("Unknown suffix");
Name = Name.substr(9); // strip off "xop.vpcom"
unsigned Imm;
if (Name.startswith("lt"))
Imm = 0;
else if (Name.startswith("le"))
Imm = 1;
else if (Name.startswith("gt"))
Imm = 2;
else if (Name.startswith("ge"))
Imm = 3;
else if (Name.startswith("eq"))
Imm = 4;
else if (Name.startswith("ne"))
Imm = 5;
else if (Name.startswith("false"))
Imm = 6;
else if (Name.startswith("true"))
Imm = 7;
else
llvm_unreachable("Unknown condition");
if (CI->getNumArgOperands() == 3) {
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
} else {
Name = Name.substr(9); // strip off "xop.vpcom"
if (Name.startswith("lt"))
Imm = 0;
else if (Name.startswith("le"))
Imm = 1;
else if (Name.startswith("gt"))
Imm = 2;
else if (Name.startswith("ge"))
Imm = 3;
else if (Name.startswith("eq"))
Imm = 4;
else if (Name.startswith("ne"))
Imm = 5;
else if (Name.startswith("false"))
Imm = 6;
else if (Name.startswith("true"))
Imm = 7;
else
llvm_unreachable("Unknown condition");
}
Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
} else if (IsX86 && Name.startswith("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2);

View File

@ -1205,14 +1205,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP,
X86ISD::GF2P8MULB, 0),
X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),

View File

@ -1133,45 +1133,6 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
}
/// Decode XOP integer vector comparison intrinsics.
static Value *simplifyX86vpcom(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder,
bool IsSigned) {
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
uint64_t Imm = CInt->getZExtValue() & 0x7;
VectorType *VecTy = cast<VectorType>(II.getType());
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
switch (Imm) {
case 0x0:
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
break;
case 0x1:
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
break;
case 0x2:
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
break;
case 0x3:
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
break;
case 0x4:
Pred = ICmpInst::ICMP_EQ; break;
case 0x5:
Pred = ICmpInst::ICMP_NE; break;
case 0x6:
return ConstantInt::getSigned(VecTy, 0); // FALSE
case 0x7:
return ConstantInt::getSigned(VecTy, -1); // TRUE
}
if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
II.getArgOperand(1)))
return Builder.CreateSExtOrTrunc(Cmp, VecTy);
}
return nullptr;
}
static bool maskIsAllOneOrUndef(Value *Mask) {
auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask)
@ -3167,22 +3128,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return nullptr;
break;
case Intrinsic::x86_xop_vpcomb:
case Intrinsic::x86_xop_vpcomd:
case Intrinsic::x86_xop_vpcomq:
case Intrinsic::x86_xop_vpcomw:
if (Value *V = simplifyX86vpcom(*II, Builder, true))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::x86_xop_vpcomub:
case Intrinsic::x86_xop_vpcomud:
case Intrinsic::x86_xop_vpcomuq:
case Intrinsic::x86_xop_vpcomuw:
if (Value *V = simplifyX86vpcom(*II, Builder, false))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating

View File

@ -74,12 +74,12 @@ define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
; X32-LABEL: commute_fold_vpcomud:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcomequd (%eax), %xmm0, %xmm0
; X32-NEXT: vpcomeqd (%eax), %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: commute_fold_vpcomud:
; X64: # %bb.0:
; X64-NEXT: vpcomequd (%rdi), %xmm0, %xmm0
; X64-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0
; X64-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %a0
%2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
@ -91,12 +91,12 @@ define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
; X32-LABEL: commute_fold_vpcomuq:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcomnequq (%eax), %xmm0, %xmm0
; X32-NEXT: vpcomneqq (%eax), %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: commute_fold_vpcomuq:
; X64: # %bb.0:
; X64-NEXT: vpcomnequq (%rdi), %xmm0, %xmm0
; X64-NEXT: vpcomneqq (%rdi), %xmm0, %xmm0
; X64-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %a0
%2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
@ -107,13 +107,12 @@ declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readn
define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
; X32-LABEL: commute_fold_vpcomuw:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcomfalseuw (%eax), %xmm0, %xmm0
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: commute_fold_vpcomuw:
; X64: # %bb.0:
; X64-NEXT: vpcomfalseuw (%rdi), %xmm0, %xmm0
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
%2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
@ -124,13 +123,12 @@ declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readn
define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
; X32-LABEL: commute_fold_vpcomw:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcomtruew (%eax), %xmm0, %xmm0
; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: commute_fold_vpcomw:
; X64: # %bb.0:
; X64-NEXT: vpcomtruew (%rdi), %xmm0, %xmm0
; X64-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
%2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew

View File

@ -726,6 +726,86 @@ define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
}
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomb:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomd:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomub:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomud:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: test_int_x86_xop_vpcmov:
; CHECK: # %bb.0:

View File

@ -663,84 +663,3 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomb:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomd:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomub:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltub %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomud:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltud %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_int_x86_xop_vpcomuq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone