1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[InstCombine] Determine demanded and known bits for funnel shifts

Support funnel shifts in InstCombine demanded bits simplification.
If the shift amount is constant, we can determine both the demanded
bits of the operands, as well as the known bits of the result.

If one of the operands has no demanded bits, it will be replaced
by undef and the funnel shift will be simplified into a simple shift
due to the simplifications added in D54778.

Differential Revision: https://reviews.llvm.org/D54869

llvm-svn: 347515
This commit is contained in:
Nikita Popov 2018-11-24 19:00:45 +00:00
parent 0be89eeb01
commit a2de3621a0
2 changed files with 58 additions and 19 deletions

View File

@ -690,6 +690,30 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
case Intrinsic::fshr:
case Intrinsic::fshl: {
const APInt *SA;
if (!match(I->getOperand(2), m_APInt(SA)))
break;
// Normalize to funnel shift left. APInt shifts of BitWidth are well-
// defined, so no need to special-case zero shifts here.
uint64_t ShiftAmt = SA->urem(BitWidth);
if (II->getIntrinsicID() == Intrinsic::fshr)
ShiftAmt = BitWidth - ShiftAmt;
APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
return I;
Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
Known.One = LHSKnown.One.shl(ShiftAmt) |
RHSKnown.One.lshr(BitWidth - ShiftAmt);
break;
}
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:

View File

@ -255,7 +255,7 @@ define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) {
define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) {
; CHECK-LABEL: @fshl_only_op0_demanded(
; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X:%.*]], 7
; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 128
; CHECK-NEXT: ret i32 [[R]]
;
@ -266,7 +266,7 @@ define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) {
define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) {
; CHECK-LABEL: @fshl_only_op1_demanded(
; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
; CHECK-NEXT: [[Z:%.*]] = lshr i32 [[Y:%.*]], 25
; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 63
; CHECK-NEXT: ret i32 [[R]]
;
@ -275,9 +275,9 @@ define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) {
ret i32 %r
}
define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_only_op0_demanded(
; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7)
define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_only_op1_demanded(
; CHECK-NEXT: [[Z:%.*]] = lshr i33 [[Y:%.*]], 7
; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 12392
; CHECK-NEXT: ret i33 [[R]]
;
@ -286,10 +286,10 @@ define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) {
ret i33 %r
}
define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_only_op1_demanded(
; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7)
; CHECK-NEXT: [[R:%.*]] = lshr i33 [[Z]], 30
define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_only_op0_demanded(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i33 [[X:%.*]], 4
; CHECK-NEXT: [[R:%.*]] = and i33 [[TMP1]], 7
; CHECK-NEXT: ret i33 [[R]]
;
%z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7)
@ -297,6 +297,29 @@ define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) {
ret i33 %r
}
define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) {
; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat(
; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24>
; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
; CHECK-NEXT: ret <2 x i31> [[R]]
;
%z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 7>)
%r = and <2 x i31> %z, <i31 63, i31 31>
ret <2 x i31> %r
}
; The shift modulo bitwidth is the same for all vector elements, but this is not simplified yet.
define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) {
; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat(
; CHECK-NEXT: [[Z:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 7, i31 38>)
; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
; CHECK-NEXT: ret <2 x i31> [[R]]
;
%z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 38>)
%r = and <2 x i31> %z, <i31 63, i31 31>
ret <2 x i31> %r
}
; Demand bits from both operands -- cannot simplify.
define i32 @fshl_both_ops_demanded(i32 %x, i32 %y) {
@ -325,11 +348,7 @@ define i33 @fshr_both_ops_demanded(i33 %x, i33 %y) {
define i32 @fshl_known_bits(i32 %x, i32 %y) {
; CHECK-LABEL: @fshl_known_bits(
; CHECK-NEXT: [[X2:%.*]] = or i32 [[X:%.*]], 1
; CHECK-NEXT: [[Y2:%.*]] = lshr i32 [[Y:%.*]], 1
; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X2]], i32 [[Y2]], i32 7)
; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 192
; CHECK-NEXT: ret i32 [[R]]
; CHECK-NEXT: ret i32 128
;
%x2 = or i32 %x, 1 ; lo bit set
%y2 = lshr i32 %y, 1 ; hi bit clear
@ -340,11 +359,7 @@ define i32 @fshl_known_bits(i32 %x, i32 %y) {
define i33 @fshr_known_bits(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_known_bits(
; CHECK-NEXT: [[X2:%.*]] = or i33 [[X:%.*]], 1
; CHECK-NEXT: [[Y2:%.*]] = lshr i33 [[Y:%.*]], 1
; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X2]], i33 [[Y2]], i33 26)
; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 192
; CHECK-NEXT: ret i33 [[R]]
; CHECK-NEXT: ret i33 128
;
%x2 = or i33 %x, 1 ; lo bit set
%y2 = lshr i33 %y, 1 ; hi bit set