mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86] Teach combineBitcastvxi1 to prefer movmsk on avx512 in more cases
If the input to the bitcast is a sign bit test, it makes sense to directly use vpmovmskb or vmovmskps/pd. This removes the need to copy the sign bits to a k-register and then to a GPR. Fixes PR46200. Differential Revision: https://reviews.llvm.org/D81327
This commit is contained in:
parent
e6c4a0d6e9
commit
1b3f3aebe1
@ -37668,14 +37668,26 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||
// movmskb even with avx512. This will be better than truncating to vXi1 and
|
||||
// using a kmov. This can especially help KNL if the input is a v16i8/v32i8
|
||||
// vpcmpeqb/vpcmpgtb.
|
||||
bool IsTruncated = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
|
||||
(Src.getOperand(0).getValueType() == MVT::v16i8 ||
|
||||
Src.getOperand(0).getValueType() == MVT::v32i8 ||
|
||||
Src.getOperand(0).getValueType() == MVT::v64i8);
|
||||
bool PreferMovMsk = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
|
||||
(Src.getOperand(0).getValueType() == MVT::v16i8 ||
|
||||
Src.getOperand(0).getValueType() == MVT::v32i8 ||
|
||||
Src.getOperand(0).getValueType() == MVT::v64i8);
|
||||
|
||||
// Prefer movmsk for AVX512 for (bitcast (setlt X, 0)) which can be handled
|
||||
// directly with vpmovmskb/vmovmskps/vmovmskpd.
|
||||
if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
|
||||
cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT &&
|
||||
ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
|
||||
EVT CmpVT = Src.getOperand(0).getValueType();
|
||||
EVT EltVT = CmpVT.getVectorElementType();
|
||||
if (CmpVT.getSizeInBits() <= 256 &&
|
||||
(EltVT == MVT::i8 || EltVT == MVT::i32 || EltVT == MVT::i64))
|
||||
PreferMovMsk = true;
|
||||
}
|
||||
|
||||
// With AVX512 vxi1 types are legal and we prefer using k-regs.
|
||||
// MOVMSK is supported in SSE2 or later.
|
||||
if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
|
||||
if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !PreferMovMsk))
|
||||
return SDValue();
|
||||
|
||||
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
|
||||
@ -38169,6 +38181,47 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
||||
return DAG.getConstant(0, SDLoc(N0), VT);
|
||||
}
|
||||
|
||||
// Look for MOVMSK that is maybe truncated and then bitcasted to vXi1.
|
||||
// Turn it into a sign bit compare that produces a k-register. This avoids
|
||||
// a trip through a GPR.
|
||||
if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
|
||||
VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
|
||||
isPowerOf2_32(VT.getVectorNumElements())) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
SDValue Src = N0;
|
||||
|
||||
// Peek through truncate.
|
||||
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
|
||||
Src = N0.getOperand(0);
|
||||
|
||||
if (Src.getOpcode() == X86ISD::MOVMSK && Src.hasOneUse()) {
|
||||
SDValue MovmskIn = Src.getOperand(0);
|
||||
MVT MovmskVT = MovmskIn.getSimpleValueType();
|
||||
unsigned MovMskElts = MovmskVT.getVectorNumElements();
|
||||
|
||||
// We allow extra bits of the movmsk to be used since they are known zero.
|
||||
// We can't convert a VPMOVMSKB without avx512bw.
|
||||
if (MovMskElts <= NumElts &&
|
||||
(Subtarget.hasBWI() || MovmskVT.getVectorElementType() != MVT::i8)) {
|
||||
EVT IntVT = EVT(MovmskVT).changeVectorElementTypeToInteger();
|
||||
MovmskIn = DAG.getBitcast(IntVT, MovmskIn);
|
||||
SDLoc dl(N);
|
||||
MVT CmpVT = MVT::getVectorVT(MVT::i1, MovMskElts);
|
||||
SDValue Cmp = DAG.getSetCC(dl, CmpVT, MovmskIn,
|
||||
DAG.getConstant(0, dl, IntVT), ISD::SETLT);
|
||||
if (EVT(CmpVT) == VT)
|
||||
return Cmp;
|
||||
|
||||
// Pad with zeroes up to original VT to replace the zeroes that were
|
||||
// being used from the MOVMSK.
|
||||
unsigned NumConcats = NumElts / MovMskElts;
|
||||
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, CmpVT));
|
||||
Ops[0] = Cmp;
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to remove bitcasts from input and output of mask arithmetic to
|
||||
// remove GPR<->K-register crossings.
|
||||
if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))
|
||||
|
@ -6334,8 +6334,7 @@ declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
|
||||
define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovb2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
|
||||
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
|
||||
@ -6347,8 +6346,7 @@ declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
|
||||
define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovb2m %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0]
|
||||
; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; CHECK-NEXT: vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0]
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
|
||||
|
@ -2764,8 +2764,7 @@ declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
|
||||
define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovd2m %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
|
||||
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: vmovmskps %ymm0, %eax # encoding: [0xc5,0xfc,0x50,0xc0]
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
|
@ -420,23 +420,12 @@ define void @bitcast_8i32_store(i8* %p, <8 x i32> %a0) {
|
||||
; AVX12-NEXT: vzeroupper
|
||||
; AVX12-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: bitcast_8i32_store:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: movb %al, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: bitcast_8i32_store:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: movb %al, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512-LABEL: bitcast_8i32_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX512-NEXT: movb %al, (%rdi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%a1 = icmp slt <8 x i32> %a0, zeroinitializer
|
||||
%a2 = bitcast <8 x i1> %a1 to i8
|
||||
store i8 %a2, i8* %p
|
||||
|
@ -67,9 +67,7 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: bitcast_v4i32_to_v2i2:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: vmovmskps %xmm0, %eax
|
||||
; AVX512-NEXT: movl %eax, %ecx
|
||||
; AVX512-NEXT: shrb $2, %cl
|
||||
; AVX512-NEXT: andb $3, %al
|
||||
@ -146,11 +144,9 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: bitcast_v16i8_to_v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpmovb2m %xmm0, %k0
|
||||
; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %ecx
|
||||
; AVX512-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX512-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX512-NEXT: movl %ecx, %eax
|
||||
; AVX512-NEXT: shrl $8, %eax
|
||||
; AVX512-NEXT: addb %cl, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
@ -191,9 +187,7 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: bitcast_v4i64_to_v2i2:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: vmovmskpd %ymm0, %eax
|
||||
; AVX512-NEXT: movl %eax, %ecx
|
||||
; AVX512-NEXT: shrb $2, %cl
|
||||
; AVX512-NEXT: andb $3, %al
|
||||
@ -235,9 +229,7 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: bitcast_v8i32_to_v2i4:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX512-NEXT: movl %eax, %ecx
|
||||
; AVX512-NEXT: shrb $4, %cl
|
||||
; AVX512-NEXT: andb $15, %al
|
||||
@ -338,19 +330,11 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: bitcast_v32i8_to_v2i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %rbp
|
||||
; AVX512-NEXT: movq %rsp, %rbp
|
||||
; AVX512-NEXT: andq $-32, %rsp
|
||||
; AVX512-NEXT: subq $32, %rsp
|
||||
; AVX512-NEXT: vpmovb2m %ymm0, %k0
|
||||
; AVX512-NEXT: kmovd %k0, (%rsp)
|
||||
; AVX512-NEXT: vmovdqa (%rsp), %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %ecx
|
||||
; AVX512-NEXT: vpextrw $1, %xmm0, %eax
|
||||
; AVX512-NEXT: vpmovmskb %ymm0, %ecx
|
||||
; AVX512-NEXT: movl %ecx, %eax
|
||||
; AVX512-NEXT: shrl $16, %eax
|
||||
; AVX512-NEXT: addl %ecx, %eax
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: movq %rbp, %rsp
|
||||
; AVX512-NEXT: popq %rbp
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%1 = icmp slt <32 x i8> %a0, zeroinitializer
|
||||
|
@ -29,9 +29,9 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) {
|
||||
;
|
||||
; SKX-LABEL: allones_v16i8_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: setb %al
|
||||
; SKX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; SKX-NEXT: cmpw $-1, %ax
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = icmp slt <16 x i8> %arg, zeroinitializer
|
||||
%tmp1 = bitcast <16 x i1> %tmp to i16
|
||||
@ -63,8 +63,8 @@ define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
|
||||
;
|
||||
; SKX-LABEL: allzeros_v16i8_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; SKX-NEXT: testw %ax, %ax
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = icmp slt <16 x i8> %arg, zeroinitializer
|
||||
@ -114,9 +114,9 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) {
|
||||
;
|
||||
; SKX-LABEL: allones_v32i8_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
||||
; SKX-NEXT: kortestd %k0, %k0
|
||||
; SKX-NEXT: setb %al
|
||||
; SKX-NEXT: vpmovmskb %ymm0, %eax
|
||||
; SKX-NEXT: cmpl $-1, %eax
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
|
||||
@ -164,8 +164,8 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
|
||||
;
|
||||
; SKX-LABEL: allzeros_v32i8_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
||||
; SKX-NEXT: kortestd %k0, %k0
|
||||
; SKX-NEXT: vpmovmskb %ymm0, %eax
|
||||
; SKX-NEXT: testl %eax, %eax
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
@ -653,20 +653,14 @@ define i1 @allones_v4i32_sign(<4 x i32> %arg) {
|
||||
;
|
||||
; KNL-LABEL: allones_v4i32_sign:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andb $15, %al
|
||||
; KNL-NEXT: vmovmskps %xmm0, %eax
|
||||
; KNL-NEXT: cmpb $15, %al
|
||||
; KNL-NEXT: sete %al
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: allones_v4i32_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: vmovmskps %xmm0, %eax
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
@ -693,19 +687,15 @@ define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
|
||||
;
|
||||
; KNL-LABEL: allzeros_v4i32_sign:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb $15, %al
|
||||
; KNL-NEXT: vmovmskps %xmm0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: sete %al
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: allzeros_v4i32_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: vmovmskps %xmm0, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = icmp slt <4 x i32> %arg, zeroinitializer
|
||||
@ -734,10 +724,7 @@ define i1 @allones_v8i32_sign(<8 x i32> %arg) {
|
||||
;
|
||||
; KNL-LABEL: allones_v8i32_sign:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: vmovmskps %ymm0, %eax
|
||||
; KNL-NEXT: cmpb $-1, %al
|
||||
; KNL-NEXT: sete %al
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -745,9 +732,9 @@ define i1 @allones_v8i32_sign(<8 x i32> %arg) {
|
||||
;
|
||||
; SKX-LABEL: allones_v8i32_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: setb %al
|
||||
; SKX-NEXT: vmovmskps %ymm0, %eax
|
||||
; SKX-NEXT: cmpb $-1, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
|
||||
@ -775,10 +762,7 @@ define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
|
||||
;
|
||||
; KNL-LABEL: allzeros_v8i32_sign:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: vmovmskps %ymm0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: sete %al
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -786,8 +770,8 @@ define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
|
||||
;
|
||||
; SKX-LABEL: allzeros_v8i32_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: vmovmskps %ymm0, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
@ -936,11 +920,7 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) {
|
||||
;
|
||||
; KNL-LABEL: allones_v4i64_sign:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andb $15, %al
|
||||
; KNL-NEXT: vmovmskpd %ymm0, %eax
|
||||
; KNL-NEXT: cmpb $15, %al
|
||||
; KNL-NEXT: sete %al
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -948,8 +928,7 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) {
|
||||
;
|
||||
; SKX-LABEL: allones_v4i64_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: vmovmskpd %ymm0, %eax
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
@ -979,19 +958,16 @@ define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
|
||||
;
|
||||
; KNL-LABEL: allzeros_v4i64_sign:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb $15, %al
|
||||
; KNL-NEXT: vmovmskpd %ymm0, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: sete %al
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: allzeros_v4i64_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: vmovmskpd %ymm0, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
@ -3924,19 +3900,12 @@ define i32 @movmskpd(<2 x double> %x) {
|
||||
;
|
||||
; KNL-LABEL: movmskpd:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $3, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: vmovmskpd %xmm0, %eax
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: movmskpd:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovq2m %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andl $3, %eax
|
||||
; SKX-NEXT: vmovmskpd %xmm0, %eax
|
||||
; SKX-NEXT: retq
|
||||
%a = bitcast <2 x double> %x to <2 x i64>
|
||||
%b = icmp slt <2 x i64> %a, zeroinitializer
|
||||
@ -3958,19 +3927,12 @@ define i32 @movmskps(<4 x float> %x) {
|
||||
;
|
||||
; KNL-LABEL: movmskps:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $15, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: vmovmskps %xmm0, %eax
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: movmskps:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andl $15, %eax
|
||||
; SKX-NEXT: vmovmskps %xmm0, %eax
|
||||
; SKX-NEXT: retq
|
||||
%a = bitcast <4 x float> %x to <4 x i32>
|
||||
%b = icmp slt <4 x i32> %a, zeroinitializer
|
||||
@ -3994,19 +3956,13 @@ define i32 @movmskpd256(<4 x double> %x) {
|
||||
;
|
||||
; KNL-LABEL: movmskpd256:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $15, %eax
|
||||
; KNL-NEXT: vmovmskpd %ymm0, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: movmskpd256:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andl $15, %eax
|
||||
; SKX-NEXT: vmovmskpd %ymm0, %eax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
%a = bitcast <4 x double> %x to <4 x i64>
|
||||
@ -4033,18 +3989,13 @@ define i32 @movmskps256(<8 x float> %x) {
|
||||
;
|
||||
; KNL-LABEL: movmskps256:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: movzbl %al, %eax
|
||||
; KNL-NEXT: vmovmskps %ymm0, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: movmskps256:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovb %k0, %eax
|
||||
; SKX-NEXT: vmovmskps %ymm0, %eax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
%a = bitcast <8 x float> %x to <8 x i32>
|
||||
@ -4072,8 +4023,7 @@ define i32 @movmskb(<16 x i8> %x) {
|
||||
;
|
||||
; SKX-LABEL: movmskb:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; SKX-NEXT: retq
|
||||
%a = icmp slt <16 x i8> %x, zeroinitializer
|
||||
%b = bitcast <16 x i1> %a to i16
|
||||
@ -4114,8 +4064,7 @@ define i32 @movmskb256(<32 x i8> %x) {
|
||||
;
|
||||
; SKX-LABEL: movmskb256:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: vpmovmskb %ymm0, %eax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
%a = icmp slt <32 x i8> %x, zeroinitializer
|
||||
|
Loading…
x
Reference in New Issue
Block a user