1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Add a DAG combine to combine (sext (setcc)) with VLX

Normally target independent DAG combine would do this combine based on getSetCCResultType, but with VLX getSetCCResultType returns a vXi1 type preventing the DAG combining from kicking in.

But doing this combine can allow us to remove the explicit sign extend that would otherwise be emitted.

This patch adds a target specific DAG combine to combine the sext+setcc when the result type is the same size as the input to the setcc. I've restricted this to FP compares and things that can be represented with PCMPEQ and PCMPGT since we don't have full integer compare support on the older ISAs.

Differential Revision: https://reviews.llvm.org/D41850

llvm-svn: 322101
This commit is contained in:
Craig Topper 2018-01-09 18:14:22 +00:00
parent b38fcb7fd8
commit 3d19c1e4f2
11 changed files with 149 additions and 264 deletions

View File

@ -35996,6 +35996,45 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm
// result type.
static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc dl(N);
// Only do this combine with AVX512 for vector extends.
if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC)
return SDValue();
// Only combine legal element types.
EVT SVT = VT.getVectorElementType();
if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
return SDValue();
// We can only do this if the vector size in 256 bits or less.
unsigned Size = VT.getSizeInBits();
if (Size > 256)
return SDValue();
// Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since
// that's the only integer compares with we have.
ISD::CondCode CC = cast<CondCodeSDNode>(N0->getOperand(2))->get();
if (ISD::isUnsignedIntSetCC(CC) || CC == ISD::SETLE || CC == ISD::SETGE ||
CC == ISD::SETNE)
return SDValue();
// Only do this combine if the extension will be fully consumed by the setcc.
EVT N00VT = N0.getOperand(0).getValueType();
EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
if (Size != MatchingVecType.getSizeInBits())
return SDValue();
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@ -36013,6 +36052,9 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
return V;
if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
// Invert and sign-extend a boolean is the same as zero-extend and subtract

View File

@ -947,11 +947,9 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 # sched: [10:1.00]
; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpmovm2q %k1, %ymm1 # sched: [1:0.25]
; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cmppd:
@ -1015,11 +1013,9 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; SKX-LABEL: test_cmpps:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1 # sched: [10:1.00]
; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpmovm2d %k1, %ymm1 # sched: [1:0.25]
; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cmpps:

View File

@ -2159,10 +2159,8 @@ define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
;
; SKX-LABEL: test_pcmpeqb:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqb:
@ -2205,10 +2203,8 @@ define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; SKX-LABEL: test_pcmpeqd:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqd:
@ -2251,10 +2247,8 @@ define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; SKX-LABEL: test_pcmpeqq:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqq:
@ -2297,10 +2291,8 @@ define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
;
; SKX-LABEL: test_pcmpeqw:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpeqw:
@ -2343,10 +2335,8 @@ define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
;
; SKX-LABEL: test_pcmpgtb:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtb:
@ -2389,10 +2379,8 @@ define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; SKX-LABEL: test_pcmpgtd:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtd:
@ -2435,10 +2423,8 @@ define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; SKX-LABEL: test_pcmpgtq:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtq:
@ -2481,10 +2467,8 @@ define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
;
; SKX-LABEL: test_pcmpgtw:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00]
; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pcmpgtw:

View File

@ -848,22 +848,13 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
}
define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
; KNL-LABEL: test44:
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; KNL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test44:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: retq
; CHECK-LABEL: test44:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%mask = icmp eq <4 x i16> %x, %y
%1 = sext <4 x i1> %mask to <4 x i32>
ret <4 x i32> %1

View File

@ -9,14 +9,13 @@ define <3 x i8 > @foo(<3 x i8>%x, <3 x i8>%a, <3 x i8>%b) {
; CHECK-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
; CHECK-NEXT: vpslld $24, %xmm0, %xmm0
; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm1
; CHECK-NEXT: vpinsrd $1, %r8d, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, %r9d, %xmm1, %xmm1
; CHECK-NEXT: vpslld $24, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $24, %xmm1, %xmm1
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
; CHECK-NEXT: vpmovm2d %k0, %xmm0
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpextrb $0, %xmm0, %eax
; CHECK-NEXT: vpextrb $4, %xmm0, %edx
; CHECK-NEXT: vpextrb $8, %xmm0, %ecx

View File

@ -21,9 +21,7 @@ define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_eq:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp oeq <4 x float> %1, %a1
@ -44,9 +42,7 @@ define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_ne:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp une <4 x float> %1, %a1
@ -67,9 +63,7 @@ define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_ord:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ord <4 x float> %1, %a1
@ -90,9 +84,7 @@ define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_uno:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp uno <4 x float> %1, %a1
@ -117,9 +109,7 @@ define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_ueq:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ueq <4 x float> %1, %a1
@ -144,9 +134,7 @@ define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_one:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp one <4 x float> %1, %a1
@ -171,9 +159,7 @@ define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
; AVX512-LABEL: commute_cmpps_lt:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp olt <4 x float> %1, %a1
@ -198,9 +184,7 @@ define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
; AVX512-LABEL: commute_cmpps_le:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ole <4 x float> %1, %a1
@ -222,9 +206,7 @@ define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_eq_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp oeq <8 x float> %1, %a1
@ -246,9 +228,7 @@ define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_ne_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp une <8 x float> %1, %a1
@ -270,9 +250,7 @@ define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_ord_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ord <8 x float> %1, %a1
@ -294,9 +272,7 @@ define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_uno_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp uno <8 x float> %1, %a1
@ -326,9 +302,7 @@ define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_ueq_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ueq <8 x float> %1, %a1
@ -358,9 +332,7 @@ define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
;
; AVX512-LABEL: commute_cmpps_one_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp one <8 x float> %1, %a1
@ -388,9 +360,7 @@ define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX512-LABEL: commute_cmpps_lt_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp olt <8 x float> %1, %a1
@ -418,9 +388,7 @@ define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX512-LABEL: commute_cmpps_le_ymm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ole <8 x float> %1, %a1
@ -446,9 +414,7 @@ define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_eq:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp oeq <2 x double> %1, %a1
@ -469,9 +435,7 @@ define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_ne:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp une <2 x double> %1, %a1
@ -492,9 +456,7 @@ define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_ord:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ord <2 x double> %1, %a1
@ -519,9 +481,7 @@ define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_ueq:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ueq <2 x double> %1, %a1
@ -546,9 +506,7 @@ define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_one:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp one <2 x double> %1, %a1
@ -569,9 +527,7 @@ define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_uno:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp uno <2 x double> %1, %a1
@ -596,9 +552,7 @@ define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
; AVX512-LABEL: commute_cmppd_lt:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp olt <2 x double> %1, %a1
@ -623,9 +577,7 @@ define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
; AVX512-LABEL: commute_cmppd_le:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ole <2 x double> %1, %a1
@ -647,9 +599,7 @@ define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_eq_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp oeq <4 x double> %1, %a1
@ -671,9 +621,7 @@ define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_ne_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp une <4 x double> %1, %a1
@ -695,9 +643,7 @@ define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_ord_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ord <4 x double> %1, %a1
@ -719,9 +665,7 @@ define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_uno_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp uno <4 x double> %1, %a1
@ -751,9 +695,7 @@ define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_ueq_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ueq <4 x double> %1, %a1
@ -783,9 +725,7 @@ define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
;
; AVX512-LABEL: commute_cmppd_one_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp one <4 x double> %1, %a1
@ -813,9 +753,7 @@ define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX512-LABEL: commute_cmppd_lt_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp olt <4 x double> %1, %a1
@ -843,9 +781,7 @@ define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX512-LABEL: commute_cmppd_le_ymmm:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ole <4 x double> %1, %a1

View File

@ -860,10 +860,8 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; SKX-LABEL: test_pcmpeqq:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 # sched: [9:1.00]
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_pcmpeqq:

View File

@ -771,10 +771,8 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; SKX-LABEL: test_pcmpgtq:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 # sched: [9:1.00]
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_pcmpgtq:

View File

@ -23,10 +23,8 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
;
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
@ -62,10 +60,8 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
;
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
@ -157,10 +153,8 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
;
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
@ -202,10 +196,8 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
;
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
@ -298,9 +290,7 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
;
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
@ -352,9 +342,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
;
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@ -463,9 +451,7 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
;
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@ -523,9 +509,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
;
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@ -640,8 +624,7 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
;
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@ -709,8 +692,7 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
;
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %ymm0
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@ -838,8 +820,7 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
;
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@ -913,8 +894,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
;
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %ymm0
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]

View File

@ -23,10 +23,8 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
;
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
@ -60,10 +58,8 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
;
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
@ -147,10 +143,8 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
;
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
@ -188,10 +182,8 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
;
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
@ -280,9 +272,7 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
;
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
@ -330,9 +320,7 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
;
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@ -431,9 +419,7 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
;
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@ -485,9 +471,7 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
;
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@ -592,8 +576,7 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
;
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@ -658,8 +641,7 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
;
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %ymm0
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@ -777,8 +759,7 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
;
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@ -849,8 +830,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
;
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %ymm0
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]

View File

@ -377,33 +377,14 @@ define <16 x i8> @vselect_packss(<16 x i16> %a0, <16 x i16> %a1, <16 x i8> %a2,
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512NOBW-LABEL: vselect_packss:
; AVX512NOBW: # %bb.0:
; AVX512NOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; AVX512NOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512NOBW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
; AVX512NOBW-NEXT: vzeroupper
; AVX512NOBW-NEXT: retq
;
; AVX512BWNOVL-LABEL: vselect_packss:
; AVX512BWNOVL: # %bb.0:
; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; AVX512BWNOVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWNOVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
; AVX512BWNOVL-NEXT: vzeroupper
; AVX512BWNOVL-NEXT: retq
;
; AVX512BWVL-LABEL: vselect_packss:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
; AVX512BWVL-NEXT: vpmovm2w %k0, %ymm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
; AVX512-LABEL: vselect_packss:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = icmp eq <16 x i16> %a0, %a1
%2 = sext <16 x i1> %1 to <16 x i16>
%3 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>