1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[X86] Add intrinsics for KTEST instructions.

These intrinsics use the same implementation as PTEST intrinsics, but use vXi1 vectors.

New clang builtins will be accompanying them shortly.

llvm-svn: 341259
This commit is contained in:
Craig Topper 2018-08-31 21:31:53 +00:00
parent 77c57aa86c
commit 9c66dc8201
4 changed files with 181 additions and 5 deletions

View File

@ -2773,6 +2773,24 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>; Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
def int_x86_avx512_kadd_q : def int_x86_avx512_kadd_q :
Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>; Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestc_b :
Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestc_w :
Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestc_d :
Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestc_q :
Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestz_b :
Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestz_w :
Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestz_d :
Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
def int_x86_avx512_ktestz_q :
Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
} }
// Conversion ops // Conversion ops

View File

@ -21297,6 +21297,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// ptest and testp intrinsics. The intrinsic these come from are designed to // ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest // return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result. // or testp pattern and a setcc for the result.
case Intrinsic::x86_avx512_ktestc_b:
case Intrinsic::x86_avx512_ktestc_w:
case Intrinsic::x86_avx512_ktestc_d:
case Intrinsic::x86_avx512_ktestc_q:
case Intrinsic::x86_avx512_ktestz_b:
case Intrinsic::x86_avx512_ktestz_w:
case Intrinsic::x86_avx512_ktestz_d:
case Intrinsic::x86_avx512_ktestz_q:
case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc: case Intrinsic::x86_sse41_ptestnzc:
@ -21315,15 +21323,30 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestz_pd_256: case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256: case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: { case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false; unsigned TestOpc = X86ISD::PTEST;
X86::CondCode X86CC; X86::CondCode X86CC;
switch (IntNo) { switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx512_ktestc_b:
case Intrinsic::x86_avx512_ktestc_w:
case Intrinsic::x86_avx512_ktestc_d:
case Intrinsic::x86_avx512_ktestc_q:
// CF = 1
TestOpc = X86ISD::KTEST;
X86CC = X86::COND_B;
break;
case Intrinsic::x86_avx512_ktestz_b:
case Intrinsic::x86_avx512_ktestz_w:
case Intrinsic::x86_avx512_ktestz_d:
case Intrinsic::x86_avx512_ktestz_q:
TestOpc = X86ISD::KTEST;
X86CC = X86::COND_E;
break;
case Intrinsic::x86_avx_vtestz_ps: case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd: case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256: case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256: case Intrinsic::x86_avx_vtestz_pd_256:
IsTestPacked = true; TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH; LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256: case Intrinsic::x86_avx_ptestz_256:
@ -21334,7 +21357,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestc_pd: case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256: case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256: case Intrinsic::x86_avx_vtestc_pd_256:
IsTestPacked = true; TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH; LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256: case Intrinsic::x86_avx_ptestc_256:
@ -21345,7 +21368,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestnzc_pd: case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256: case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: case Intrinsic::x86_avx_vtestnzc_pd_256:
IsTestPacked = true; TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH; LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestnzc: case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256: case Intrinsic::x86_avx_ptestnzc_256:
@ -21356,7 +21379,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue LHS = Op.getOperand(1); SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2); SDValue RHS = Op.getOperand(2);
unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG); SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);

View File

@ -58,6 +58,74 @@ entry:
} }
declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>) declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>)
define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) {
; CHECK-LABEL: test_x86_avx512_ktestc_d:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <32 x i16> %A, zeroinitializer
%2 = icmp ne <32 x i16> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone
define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) {
; CHECK-LABEL: test_x86_avx512_ktestz_d:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <32 x i16> %A, zeroinitializer
%2 = icmp ne <32 x i16> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone
define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) {
; CHECK-LABEL: test_x86_avx512_ktestc_q:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <64 x i8> %A, zeroinitializer
%2 = icmp ne <64 x i8> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone
define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) {
; CHECK-LABEL: test_x86_avx512_ktestz_q:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <64 x i8> %A, zeroinitializer
%2 = icmp ne <64 x i8> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone
define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_mask_packs_epi32_rr_512: ; CHECK-LABEL: test_mask_packs_epi32_rr_512:
; CHECK: # %bb.0: ; CHECK: # %bb.0:

View File

@ -48,6 +48,74 @@ entry:
} }
declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>) declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>)
define i32 @test_x86_avx512_ktestc_w(<16 x i32> %A, <16 x i32> %B) {
; CHECK-LABEL: test_x86_avx512_ktestc_w:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <16 x i32> %A, zeroinitializer
%2 = icmp ne <16 x i32> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestc.w(<16 x i1>, <16 x i1>) nounwind readnone
define i32 @test_x86_avx512_ktestz_w(<16 x i32> %A, <16 x i32> %B) {
; CHECK-LABEL: test_x86_avx512_ktestz_w:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <16 x i32> %A, zeroinitializer
%2 = icmp ne <16 x i32> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestz.w(<16 x i1>, <16 x i1>) nounwind readnone
define i32 @test_x86_avx512_ktestc_b(<8 x i64> %A, <8 x i64> %B) {
; CHECK-LABEL: test_x86_avx512_ktestc_b:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <8 x i64> %A, zeroinitializer
%2 = icmp ne <8 x i64> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestc.b(<8 x i1>, <8 x i1>) nounwind readnone
define i32 @test_x86_avx512_ktestz_b(<8 x i64> %A, <8 x i64> %B) {
; CHECK-LABEL: test_x86_avx512_ktestz_b:
; CHECK: # %bb.0:
; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%1 = icmp ne <8 x i64> %A, zeroinitializer
%2 = icmp ne <8 x i64> %B, zeroinitializer
%res = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
ret i32 %res
}
declare i32 @llvm.x86.avx512.ktestz.b(<8 x i1>, <8 x i1>) nounwind readnone
declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {