1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00

[DAG] enhance computeKnownBits to handle SHL with vector splat constant

Also, use APInt to avoid crashing on types larger than vNi64.

llvm-svn: 284874
This commit is contained in:
Sanjay Patel 2016-10-21 20:16:27 +00:00
parent fe92dce0bb
commit 5dbf39179f
3 changed files with 12 additions and 20 deletions

View File

@ -2144,23 +2144,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::SHL:
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();
if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
APInt ShAmt = SA->getAPIntValue();
if (ShAmt.uge(BitWidth))
break;
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero <<= ShAmt;
KnownOne <<= ShAmt;
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
KnownZero = KnownZero << ShAmt;
KnownOne = KnownOne << ShAmt;
// low bits known zero.
KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt.getZExtValue());
}
break;
case ISD::SRL:
// (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
// FIXME: Reuse isConstOrConstSplat + APInt from above.
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();
@ -2177,6 +2175,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
break;
case ISD::SRA:
// FIXME: Reuse isConstOrConstSplat + APInt from above.
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();

View File

@ -61,16 +61,12 @@ define <4 x i32> @combine_vec_shl_by_zero(<4 x i32> %x) {
define <4 x i32> @combine_vec_shl_known_zero0(<4 x i32> %x) {
; SSE-LABEL: combine_vec_shl_known_zero0:
; SSE: # BB#0:
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_known_zero0:
; AVX: # BB#0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
; AVX-NEXT: vpslld $16, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = and <4 x i32> %x, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
%2 = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>

View File

@ -35,10 +35,7 @@ define i8 @negate_zero_or_minsigned_nsw(i8 %x) {
define <4 x i32> @negate_zero_or_minsigned_nsw_vec(<4 x i32> %x) {
; CHECK-LABEL: negate_zero_or_minsigned_nsw_vec:
; CHECK: # BB#0:
; CHECK-NEXT: pslld $31, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: psubd %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
;
%signbit = shl <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>