From 7eb6729e53439f12b0fcc3431b2374551410a303 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 5 Feb 2018 23:57:01 +0000 Subject: [PATCH] [X86] Relax restrictions on what setcc condition codes can be folded with a sext when AVX512 is enabled. We now allow all signed comparisons and not equal. The complement that needs to be added for this is no worse than the extend. And the vector output forms of pcmpeq/pcmpgt have better latency than the k-register version on SKX. llvm-svn: 324294 --- lib/Target/X86/X86ISelLowering.cpp | 3 +-- test/CodeGen/X86/avx512-ext.ll | 4 ++-- test/CodeGen/X86/avx512-schedule.ll | 8 ++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5df78fe3e5c..9e5cb113ac6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -36118,8 +36118,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since // that's the only integer compares with we have. ISD::CondCode CC = cast(N0->getOperand(2))->get(); - if (ISD::isUnsignedIntSetCC(CC) || CC == ISD::SETLE || CC == ISD::SETGE || - CC == ISD::SETNE) + if (ISD::isUnsignedIntSetCC(CC)) return SDValue(); // Only do this combine if the extension will be fully consumed by the setcc. diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll index 130bba556a7..64db862132d 100644 --- a/test/CodeGen/X86/avx512-ext.ll +++ b/test/CodeGen/X86/avx512-ext.ll @@ -1415,8 +1415,8 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; ; SKX-LABEL: sext_8i1_8i32: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpled %ymm0, %ymm1, %k0 -; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 ; SKX-NEXT: retq %x = icmp slt <8 x i32> %a1, %a2 %x1 = xor <8 x i1>%x, diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 886e4f2fbcb..d83c7cd96c3 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -4365,14 +4365,14 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; GENERIC-LABEL: sext_8i1_8i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33] +; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8i1_8i32: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] %x = icmp slt <8 x i32> %a1, %a2 %x1 = xor <8 x i1>%x,