From fc6ca85023ae063d7f75772ff2b175058acacbba Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 20 Jan 2017 22:18:47 +0000 Subject: [PATCH] [ValueTracking] recognize variations of 'clamp' to improve codegen (PR31693) By enhancing value tracking, we allow an existing min/max canonicalization to kick in and improve codegen for several targets that have min/max instructions. Unfortunately, recognizing min/max in value tracking may cause us to hit a hack in InstCombiner::visitICmpInst() more often: http://lists.llvm.org/pipermail/llvm-dev/2017-January/109340.html ...but I'm hoping we can remove that soon. Correctness proofs based on Alive: Name: smaxmin Pre: C1 < C2 %cmp2 = icmp slt i8 %x, C2 %min = select i1 %cmp2, i8 %x, i8 C2 %cmp3 = icmp slt i8 %x, C1 %r = select i1 %cmp3, i8 C1, i8 %min => %cmp2 = icmp slt i8 %x, C2 %min = select i1 %cmp2, i8 %x, i8 C2 %cmp1 = icmp sgt i8 %min, C1 %r = select i1 %cmp1, i8 %min, i8 C1 Name: sminmax Pre: C1 > C2 %cmp2 = icmp sgt i8 %x, C2 %max = select i1 %cmp2, i8 %x, i8 C2 %cmp3 = icmp sgt i8 %x, C1 %r = select i1 %cmp3, i8 C1, i8 %max => %cmp2 = icmp sgt i8 %x, C2 %max = select i1 %cmp2, i8 %x, i8 C2 %cmp1 = icmp slt i8 %max, C1 %r = select i1 %cmp1, i8 %max, i8 C1 ---------------------------------------- Optimization: smaxmin Done: 1 Optimization is correct! ---------------------------------------- Optimization: sminmax Done: 1 Optimization is correct! Name: umaxmin Pre: C1 u< C2 %cmp2 = icmp ult i8 %x, C2 %min = select i1 %cmp2, i8 %x, i8 C2 %cmp3 = icmp ult i8 %x, C1 %r = select i1 %cmp3, i8 C1, i8 %min => %cmp2 = icmp ult i8 %x, C2 %min = select i1 %cmp2, i8 %x, i8 C2 %cmp1 = icmp ugt i8 %min, C1 %r = select i1 %cmp1, i8 %min, i8 C1 Name: uminmax Pre: C1 u> C2 %cmp2 = icmp ugt i8 %x, C2 %max = select i1 %cmp2, i8 %x, i8 C2 %cmp3 = icmp ugt i8 %x, C1 %r = select i1 %cmp3, i8 C1, i8 %max => %cmp2 = icmp ugt i8 %x, C2 %max = select i1 %cmp2, i8 %x, i8 C2 %cmp1 = icmp ult i8 %max, C1 %r = select i1 %cmp1, i8 %max, i8 C1 ---------------------------------------- Optimization: umaxmin Done: 1 Optimization is correct! ---------------------------------------- Optimization: uminmax Done: 1 Optimization is correct! llvm-svn: 292660 --- lib/Analysis/ValueTracking.cpp | 40 +++++++++++++++++++++- test/CodeGen/X86/vec_minmax_match.ll | 26 +++++--------- test/Transforms/InstCombine/minmax-fold.ll | 16 ++++----- 3 files changed, 55 insertions(+), 27 deletions(-) diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 00536235287..4f18cc8b117 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3918,6 +3918,45 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS) { + // Recognize variations of: + // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) + const APInt *C1; + if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { + const APInt *C2; + + // (X SMAX(SMIN(X, C2), C1) + if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && + C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) { + LHS = TrueVal; + RHS = FalseVal; + return {SPF_SMAX, SPNB_NA, false}; + } + + // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) + if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && + C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) { + LHS = TrueVal; + RHS = FalseVal; + return {SPF_SMIN, SPNB_NA, false}; + } + + // (X UMAX(UMIN(X, C2), C1) + if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && + C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) { + LHS = TrueVal; + RHS = FalseVal; + return {SPF_UMAX, SPNB_NA, false}; + } + + // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) + if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && + C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) { + LHS = TrueVal; + RHS = FalseVal; + return {SPF_UMIN, SPNB_NA, false}; + } + } + if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -3941,7 +3980,6 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; } - const APInt *C1; if (!match(CmpRHS, m_APInt(C1))) return {SPF_UNKNOWN, SPNB_NA, false}; diff --git a/test/CodeGen/X86/vec_minmax_match.ll b/test/CodeGen/X86/vec_minmax_match.ll index 6644d5dc84b..98f77912779 100644 --- a/test/CodeGen/X86/vec_minmax_match.ll +++ b/test/CodeGen/X86/vec_minmax_match.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s ; These are actually tests of ValueTracking, and so may have test coverage in InstCombine or other @@ -165,10 +164,8 @@ define <4 x i32> @umin_vec2(<4 x i32> %x) { define <4 x i32> @clamp_signed1(<4 x i32> %x) { ; CHECK-LABEL: clamp_signed1: ; CHECK: # BB#0: -; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15] -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp2 = icmp slt <4 x i32> %x, %min = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32> @@ -182,10 +179,8 @@ define <4 x i32> @clamp_signed1(<4 x i32> %x) { define <4 x i32> @clamp_signed2(<4 x i32> %x) { ; CHECK-LABEL: clamp_signed2: ; CHECK: # BB#0: -; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255] -; CHECK-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp2 = icmp sgt <4 x i32> %x, %max = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32> @@ -199,11 +194,8 @@ define <4 x i32> @clamp_signed2(<4 x i32> %x) { define <4 x i32> @clamp_unsigned1(<4 x i32> %x) { ; CHECK-LABEL: clamp_unsigned1: ; CHECK: # BB#0: -; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 -; CHECK-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483663,2147483663,2147483663,2147483663] -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0 +; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp2 = icmp ult <4 x i32> %x, %min = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32> @@ -217,10 +209,8 @@ define <4 x i32> @clamp_unsigned1(<4 x i32> %x) { define <4 x i32> @clamp_unsigned2(<4 x i32> %x) { ; CHECK-LABEL: clamp_unsigned2: ; CHECK: # BB#0: -; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm1 -; CHECK-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0 +; CHECK-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp2 = icmp ugt <4 x i32> %x, %max = select <4 x i1> %cmp2, <4 x i32> %x, <4 x i32> diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll index ec838e25f31..adb99c283c2 100644 --- a/test/Transforms/InstCombine/minmax-fold.ll +++ b/test/Transforms/InstCombine/minmax-fold.ll @@ -348,8 +348,8 @@ define i32 @clamp_signed1(i32 %x) { ; CHECK-LABEL: @clamp_signed1( ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %x, 255 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 %x, i32 255 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %x, 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 15, i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp slt i32 %x, 255 @@ -365,8 +365,8 @@ define i32 @clamp_signed2(i32 %x) { ; CHECK-LABEL: @clamp_signed2( ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 %x, 15 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 %x, i32 15 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 %x, 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 255, i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp sgt i32 %x, 15 @@ -382,8 +382,8 @@ define i32 @clamp_unsigned1(i32 %x) { ; CHECK-LABEL: @clamp_unsigned1( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 %x, 255 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 %x, i32 255 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 %x, 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 15, i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp ult i32 %x, 255 @@ -399,8 +399,8 @@ define i32 @clamp_unsigned2(i32 %x) { ; CHECK-LABEL: @clamp_unsigned2( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 %x, 15 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 %x, i32 15 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 %x, 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 255, i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp ugt i32 %x, 15