[ValueTracking] recognize even more variants of smin/smax

Similar to: https://reviews.llvm.org/rL285499 https://reviews.llvm.org/rL286318 We can't minimally expose this in IR tests because we don't have min/max intrinsics, but the difference is visible in codegen because SelectionDAGBuilder::visitSelect() uses matchSelectPattern(). We're not canonicalizing these patterns in IR (yet), so I don't expect there to be any regressions as noted here: http://lists.llvm.org/pipermail/llvm-dev/2016-November/106868.html llvm-svn: 286776
2024-10-18 18:42:46 +02:00 · 2016-11-13 20:04:52 +00:00 · 2016-11-13 20:04:52 +00:00 · a6fc956a6e
commit a6fc956a6e
parent d8ce8d6613
2 changed files with 32 additions and 12 deletions
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@ -3861,6 +3861,26 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
  if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
    return {SPF_UNKNOWN, SPNB_NA, false};

+  // Z = X -nsw Y
+  // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
+  // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
+  if (match(TrueVal, m_Zero()) &&
+      match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
+  }
+
+  // Z = X -nsw Y
+  // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
+  // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
+  if (match(FalseVal, m_Zero()) &&
+      match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+  }
+
  const APInt *C1;
  if (!match(CmpRHS, m_APInt(C1)))
    return {SPF_UNKNOWN, SPNB_NA, false};
--- a/test/CodeGen/X86/vec_minmax_match.ll
+++ b/test/CodeGen/X86/vec_minmax_match.ll
@ -37,9 +37,9 @@ define <4 x i32> @smin_vec2(<4 x i32> %x) {
 define <4 x i32> @smin_vec3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: smin_vec3:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm2
-; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpandn %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
  %sub = sub nsw <4 x i32> %x, %y
@ -53,9 +53,9 @@ define <4 x i32> @smin_vec3(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @smin_vec4(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: smin_vec4:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm2
-; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
  %sub = sub nsw <4 x i32> %x, %y
@ -97,9 +97,9 @@ define <4 x i32> @smax_vec2(<4 x i32> %x) {
 define <4 x i32> @smax_vec3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: smax_vec3:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm2
-; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT:    vpandn %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
  %sub = sub nsw <4 x i32> %x, %y
@ -113,9 +113,9 @@ define <4 x i32> @smax_vec3(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @smax_vec4(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: smax_vec4:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm2
-; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 ;
  %sub = sub nsw <4 x i32> %x, %y