[X86] Fix an issue in the matching for ADDUS.

We were basically assuming only one operand of the compare could be an ADD node and using that to swap operands. But we can have a normal add followed by a saturing add. This rewrites the canonicalization to just be based on the condition code. llvm-svn: 340134
2024-11-23 11:13:28 +01:00 · 2018-08-19 04:26:31 +00:00 · 2018-08-19 04:26:31 +00:00 · 9d5e574a84
commit 9d5e574a84
parent 9610d3de8c
2 changed files with 14 additions and 30 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -33111,12 +33111,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
    SDValue CondLHS = Cond->getOperand(0);
    SDValue CondRHS = Cond->getOperand(1);

-    // Canonicalize ADD to CondRHS to simplify the logic below.
-    if (CondLHS.getOpcode() == ISD::ADD) {
-      std::swap(CondLHS, CondRHS);
-      CC = ISD::getSetCCSwappedOperands(CC);
-    }
-
    // Check if one of the arms of the VSELECT is vector with all bits set.
    // If it's on the left side invert the predicate to simplify logic below.
    SDValue Other;
@ -33127,10 +33121,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
      Other = LHS;
    }

-    // We can test against either of the addition operands.
-    if (Other.getNode() && Other.getNumOperands() == 2 &&
-        (Other.getOperand(0) == CondLHS ||
-         Other.getOperand(1) == CondLHS)) {
+    if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
      SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);

      auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
@ -33138,9 +33129,17 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
        return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops);
      };

+      // Canonicalize condition operands.
+      if (CC == ISD::SETUGE) {
+        std::swap(CondLHS, CondRHS);
+        CC = ISD::SETULE;
+      }
+
+      // We can test against either of the addition operands.
      // x <= x+y ? x+y : ~0 --> addus x, y
-      if ((CC == ISD::SETULE) &&
-          Other.getOpcode() == ISD::ADD && Other == CondRHS)
+      // x+y >= x ? x+y : ~0 --> addus x, y
+      if (CC == ISD::SETULE && Other == CondRHS &&
+          (OpLHS == CondLHS || OpRHS == CondLHS))
        return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
                                ADDUSBuilder);
    }
--- a/test/CodeGen/X86/sse2-intrinsics-canonical.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-canonical.ll
@ -278,34 +278,19 @@ define <8 x i16> @add_addusw(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
 ; SSE-LABEL: add_addusw:
 ; SSE:       ## %bb.0:
 ; SSE-NEXT:    paddw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xfd,0xca]
-; SSE-NEXT:    paddw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xfd,0xc1]
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT:    ## encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
-; SSE-NEXT:    ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4
-; SSE-NEXT:    pxor %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xef,0xca]
-; SSE-NEXT:    pxor %xmm0, %xmm2 ## encoding: [0x66,0x0f,0xef,0xd0]
-; SSE-NEXT:    pcmpgtw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0x65,0xca]
-; SSE-NEXT:    por %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xeb,0xc1]
+; SSE-NEXT:    paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; AVX2-LABEL: add_addusw:
 ; AVX2:       ## %bb.0:
 ; AVX2-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xfd,0xca]
-; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
-; AVX2-NEXT:    vpminuw %xmm0, %xmm1, %xmm2 ## encoding: [0xc4,0xe2,0x71,0x3a,0xd0]
-; AVX2-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x75,0xca]
-; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x76,0xd2]
-; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xca]
-; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xeb,0xc0]
+; AVX2-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
 ; AVX2-NEXT:    retl ## encoding: [0xc3]
 ;
 ; SKX-LABEL: add_addusw:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca]
-; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
-; SKX-NEXT:    vpcmpnleuw %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3e,0xc8,0x06]
-; SKX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
-; SKX-NEXT:    vmovdqu16 %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
+; SKX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
 ; SKX-NEXT:    retl ## encoding: [0xc3]
  %a = add <8 x i16> %y, %z
  %b = add <8 x i16> %x, %a