1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[SelectionDAG] ComputeKnownBits - merge getValidMinimumShiftAmountConstant() and generic ISD::SRL handling.

As mentioned by @nikic on rGef5debac4302 (although that was just about SHL), we can merge the guaranteed top zero bits from the shifted value, and then, if a min shift amount is known, zero out the top bits as well.

SHL tests / handling will be added in a follow up patch.
This commit is contained in:
Simon Pilgrim 2020-01-14 11:20:09 +00:00
parent bb2458b788
commit f2b1ef955e
3 changed files with 12 additions and 14 deletions

View File

@ -2885,23 +2885,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
break;
case ISD::SRL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
Known.Zero.lshrInPlace(Shift);
Known.One.lshrInPlace(Shift);
// High bits are known zero.
Known.Zero.setHighBits(Shift);
} else if (const APInt *ShMinAmt =
getValidMinimumShiftAmountConstant(Op, DemandedElts)) {
// Minimum shift high bits are known zero.
Known.Zero.setHighBits(ShMinAmt->getZExtValue());
} else {
// No matter the shift amount, the leading zeros will stay zero.
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
Known.One.clearAllBits();
break;
}
// No matter the shift amount, the leading zeros will stay zero.
Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
Known.One.clearAllBits();
// Minimum shift high bits are known zero.
if (const APInt *ShMinAmt =
getValidMinimumShiftAmountConstant(Op, DemandedElts))
Known.Zero.setHighBits(ShMinAmt->getZExtValue());
break;
case ISD::SRA:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {

View File

@ -405,7 +405,6 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
@ -923,7 +922,6 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1

View File

@ -345,7 +345,6 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
@ -793,7 +792,6 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1