1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 03:23:01 +02:00

[X86] Add a DAG combine to widen (i4 (bitcast (v4i1))) before type legalization sees the i4 and changes to load/store.

Same for v2i1 and i2.

llvm-svn: 321602
This commit is contained in:
Craig Topper 2017-12-31 09:50:38 +00:00
parent e8a65f8f62
commit 98a2e44b5b
6 changed files with 146 additions and 148 deletions

View File

@ -30426,6 +30426,18 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
DAG.getIntPtrConstant(0, dl));
}
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
// type, widen both sides to avoid a trip through memory.
if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
Subtarget.hasVLX()) {
SDLoc dl(N);
N0 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
DAG.getUNDEF(MVT::v8i1), N0,
DAG.getIntPtrConstant(0, dl));
N0 = DAG.getBitcast(MVT::i8, N0);
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
}
}
// Since MMX types are special and don't usually play with other vector types,

View File

@ -5333,8 +5333,9 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
@ -5361,8 +5362,9 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
@ -5391,8 +5393,9 @@ define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
@ -5430,8 +5433,9 @@ define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
@ -5470,8 +5474,9 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b)
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
@ -5502,8 +5507,9 @@ define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
@ -14443,8 +14449,9 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
@ -14471,8 +14478,9 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
@ -14501,8 +14509,9 @@ define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i6
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
@ -14540,8 +14549,9 @@ define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
@ -14580,8 +14590,9 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
@ -14612,8 +14623,9 @@ define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
@ -23681,8 +23693,9 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
@ -23711,8 +23724,9 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
@ -23744,8 +23758,9 @@ define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i6
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
@ -23783,8 +23798,9 @@ define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
@ -23824,8 +23840,9 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
@ -23858,8 +23875,9 @@ define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
@ -33075,8 +33093,9 @@ define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
@ -33106,8 +33125,9 @@ define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
@ -33139,8 +33159,9 @@ define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i6
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
@ -33181,8 +33202,9 @@ define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
@ -33224,8 +33246,9 @@ define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
@ -33259,8 +33282,9 @@ define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
@ -39955,8 +39979,9 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b)
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
@ -39983,8 +40008,9 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>*
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
@ -40012,8 +40038,9 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
@ -40044,8 +40071,9 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i6
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
@ -40078,8 +40106,9 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
@ -40113,8 +40142,9 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: andb $3, %al
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:

View File

@ -80,8 +80,7 @@ define i4 @v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512F-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i32:
@ -89,8 +88,7 @@ define i4 @v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = icmp sgt <4 x i32> %a, %b
%x1 = icmp sgt <4 x i32> %c, %d
@ -123,8 +121,7 @@ define i4 @v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d)
; AVX512F-NEXT: vcmpltps %xmm0, %xmm1, %k1
; AVX512F-NEXT: vcmpltps %xmm2, %xmm3, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4f32:
@ -132,8 +129,7 @@ define i4 @v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d)
; AVX512BW-NEXT: vcmpltps %xmm0, %xmm1, %k1
; AVX512BW-NEXT: vcmpltps %xmm2, %xmm3, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = fcmp ogt <4 x float> %a, %b
%x1 = fcmp ogt <4 x float> %c, %d
@ -318,8 +314,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512F-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i8:
@ -335,8 +330,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = icmp sgt <2 x i8> %a, %b
%x1 = icmp sgt <2 x i8> %c, %d
@ -473,8 +467,7 @@ define i2 @v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) {
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512F-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i16:
@ -490,8 +483,7 @@ define i2 @v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) {
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = icmp sgt <2 x i16> %a, %b
%x1 = icmp sgt <2 x i16> %c, %d
@ -612,8 +604,7 @@ define i2 @v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512F-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i32:
@ -629,8 +620,7 @@ define i2 @v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = icmp sgt <2 x i32> %a, %b
%x1 = icmp sgt <2 x i32> %c, %d
@ -682,8 +672,7 @@ define i2 @v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512F-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i64:
@ -691,8 +680,7 @@ define i2 @v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = icmp sgt <2 x i64> %a, %b
%x1 = icmp sgt <2 x i64> %c, %d
@ -725,8 +713,7 @@ define i2 @v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double>
; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; AVX512F-NEXT: vcmpltpd %xmm2, %xmm3, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2f64:
@ -734,8 +721,7 @@ define i2 @v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double>
; AVX512BW-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; AVX512BW-NEXT: vcmpltpd %xmm2, %xmm3, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = fcmp ogt <2 x double> %a, %b
%x1 = fcmp ogt <2 x double> %c, %d
@ -792,8 +778,7 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512F-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i8:
@ -809,8 +794,7 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = icmp sgt <4 x i8> %a, %b
%x1 = icmp sgt <4 x i8> %c, %d
@ -867,8 +851,7 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512F-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i16:
@ -884,8 +867,7 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm3, %xmm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x0 = icmp sgt <4 x i16> %a, %b
%x1 = icmp sgt <4 x i16> %c, %d

View File

@ -94,8 +94,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@ -104,8 +103,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%x0 = icmp sgt <4 x i64> %a, %b
@ -148,8 +146,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double>
; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@ -158,8 +155,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double>
; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%x0 = fcmp ogt <4 x double> %a, %b

View File

@ -64,16 +64,14 @@ define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = icmp sgt <4 x i32> %a, %b
%res = bitcast <4 x i1> %x to i4
@ -99,16 +97,14 @@ define i4 @v4f32(<4 x float> %a, <4 x float> %b) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcmpltps %xmm0, %xmm1, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4f32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vcmpltps %xmm0, %xmm1, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = fcmp ogt <4 x float> %a, %b
%res = bitcast <4 x i1> %x to i4
@ -226,8 +222,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
; AVX512F-NEXT: vpsraq $56, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i8:
@ -238,8 +233,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
; AVX512BW-NEXT: vpsraq $56, %xmm0, %xmm0
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = icmp sgt <2 x i8> %a, %b
%res = bitcast <2 x i1> %x to i2
@ -320,8 +314,7 @@ define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) {
; AVX512F-NEXT: vpsraq $48, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i16:
@ -332,8 +325,7 @@ define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) {
; AVX512BW-NEXT: vpsraq $48, %xmm0, %xmm0
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = icmp sgt <2 x i16> %a, %b
%res = bitcast <2 x i1> %x to i2
@ -406,8 +398,7 @@ define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) {
; AVX512F-NEXT: vpsraq $32, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i32:
@ -418,8 +409,7 @@ define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) {
; AVX512BW-NEXT: vpsraq $32, %xmm0, %xmm0
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = icmp sgt <2 x i32> %a, %b
%res = bitcast <2 x i1> %x to i2
@ -455,16 +445,14 @@ define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = icmp sgt <2 x i64> %a, %b
%res = bitcast <2 x i1> %x to i2
@ -490,16 +478,14 @@ define i2 @v2f64(<2 x double> %a, <2 x double> %b) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2f64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vcmpltpd %xmm0, %xmm1, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = fcmp ogt <2 x double> %a, %b
%res = bitcast <2 x i1> %x to i2
@ -537,8 +523,7 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) {
; AVX512F-NEXT: vpsrad $24, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i8:
@ -549,8 +534,7 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) {
; AVX512BW-NEXT: vpsrad $24, %xmm0, %xmm0
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = icmp sgt <4 x i8> %a, %b
%res = bitcast <4 x i1> %x to i4
@ -588,8 +572,7 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) {
; AVX512F-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i16:
@ -600,8 +583,7 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) {
; AVX512BW-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: retq
%x = icmp sgt <4 x i16> %a, %b
%res = bitcast <4 x i1> %x to i4

View File

@ -271,8 +271,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@ -280,8 +279,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b) {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%x = icmp sgt <4 x i64> %a, %b
@ -311,8 +309,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@ -320,8 +317,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b) {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%x = fcmp ogt <4 x double> %a, %b