1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[X86] Fix an issue in the matching for ADDUS.

We were basically assuming only one operand of the compare could be an ADD node and using that to swap operands. But we can have a normal add followed by a saturing add.

This rewrites the canonicalization to just be based on the condition code.

llvm-svn: 340134
This commit is contained in:
Craig Topper 2018-08-19 04:26:31 +00:00
parent 9610d3de8c
commit 9d5e574a84
2 changed files with 14 additions and 30 deletions

View File

@ -33111,12 +33111,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
SDValue CondLHS = Cond->getOperand(0);
SDValue CondRHS = Cond->getOperand(1);
// Canonicalize ADD to CondRHS to simplify the logic below.
if (CondLHS.getOpcode() == ISD::ADD) {
std::swap(CondLHS, CondRHS);
CC = ISD::getSetCCSwappedOperands(CC);
}
// Check if one of the arms of the VSELECT is vector with all bits set.
// If it's on the left side invert the predicate to simplify logic below.
SDValue Other;
@ -33127,10 +33121,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
Other = LHS;
}
// We can test against either of the addition operands.
if (Other.getNode() && Other.getNumOperands() == 2 &&
(Other.getOperand(0) == CondLHS ||
Other.getOperand(1) == CondLHS)) {
if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
@ -33138,9 +33129,17 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops);
};
// Canonicalize condition operands.
if (CC == ISD::SETUGE) {
std::swap(CondLHS, CondRHS);
CC = ISD::SETULE;
}
// We can test against either of the addition operands.
// x <= x+y ? x+y : ~0 --> addus x, y
if ((CC == ISD::SETULE) &&
Other.getOpcode() == ISD::ADD && Other == CondRHS)
// x+y >= x ? x+y : ~0 --> addus x, y
if (CC == ISD::SETULE && Other == CondRHS &&
(OpLHS == CondLHS || OpRHS == CondLHS))
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
ADDUSBuilder);
}

View File

@ -278,34 +278,19 @@ define <8 x i16> @add_addusw(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
; SSE-LABEL: add_addusw:
; SSE: ## %bb.0:
; SSE-NEXT: paddw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xfd,0xca]
; SSE-NEXT: paddw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xfd,0xc1]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE-NEXT: ## encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
; SSE-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4
; SSE-NEXT: pxor %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xef,0xca]
; SSE-NEXT: pxor %xmm0, %xmm2 ## encoding: [0x66,0x0f,0xef,0xd0]
; SSE-NEXT: pcmpgtw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0x65,0xca]
; SSE-NEXT: por %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xeb,0xc1]
; SSE-NEXT: paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
; SSE-NEXT: retl ## encoding: [0xc3]
;
; AVX2-LABEL: add_addusw:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xfd,0xca]
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
; AVX2-NEXT: vpminuw %xmm0, %xmm1, %xmm2 ## encoding: [0xc4,0xe2,0x71,0x3a,0xd0]
; AVX2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x75,0xca]
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x76,0xd2]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xca]
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xeb,0xc0]
; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
; AVX2-NEXT: retl ## encoding: [0xc3]
;
; SKX-LABEL: add_addusw:
; SKX: ## %bb.0:
; SKX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca]
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
; SKX-NEXT: vpcmpnleuw %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3e,0xc8,0x06]
; SKX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
; SKX-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%a = add <8 x i16> %y, %z
%b = add <8 x i16> %x, %a