mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 10:32:48 +02:00
[InstCombine] Simplify cttz/ctlz + icmp ugt/ult
Followup to D55745, this time handling comparisons with ugt and ult predicates (which are the canonical forms for non-equality predicates). For ctlz we can convert into a simple icmp, for cttz we can convert into a mask check. Differential Revision: https://reviews.llvm.org/D56355 llvm-svn: 351645
This commit is contained in:
parent
6ff896cb84
commit
56baabf50e
@ -2610,8 +2610,9 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
|
||||
return I;
|
||||
}
|
||||
|
||||
if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
|
||||
return I;
|
||||
if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
|
||||
if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
|
||||
return I;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
@ -2755,14 +2756,10 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
|
||||
Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
|
||||
const APInt &C) {
|
||||
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
|
||||
if (!II || !Cmp.isEquality())
|
||||
return nullptr;
|
||||
|
||||
// Handle icmp {eq|ne} <intrinsic>, Constant.
|
||||
/// Fold an equality icmp with LLVM intrinsic and constant operand.
|
||||
Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
|
||||
IntrinsicInst *II,
|
||||
const APInt &C) {
|
||||
Type *Ty = II->getType();
|
||||
unsigned BitWidth = C.getBitWidth();
|
||||
switch (II->getIntrinsicID()) {
|
||||
@ -2822,6 +2819,65 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
|
||||
Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
|
||||
IntrinsicInst *II,
|
||||
const APInt &C) {
|
||||
if (Cmp.isEquality())
|
||||
return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
|
||||
|
||||
Type *Ty = II->getType();
|
||||
unsigned BitWidth = C.getBitWidth();
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::ctlz: {
|
||||
// ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
|
||||
if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
|
||||
unsigned Num = C.getLimitedValue();
|
||||
APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
|
||||
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
|
||||
II->getArgOperand(0), ConstantInt::get(Ty, Limit));
|
||||
}
|
||||
|
||||
// ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
|
||||
if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
|
||||
C.uge(1) && C.ule(BitWidth)) {
|
||||
unsigned Num = C.getLimitedValue();
|
||||
APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
|
||||
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
|
||||
II->getArgOperand(0), ConstantInt::get(Ty, Limit));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::cttz: {
|
||||
// Limit to one use to ensure we don't increase instruction count.
|
||||
if (!II->hasOneUse())
|
||||
return nullptr;
|
||||
|
||||
// cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
|
||||
if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
|
||||
APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
|
||||
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
|
||||
Builder.CreateAnd(II->getArgOperand(0), Mask),
|
||||
ConstantInt::getNullValue(Ty));
|
||||
}
|
||||
|
||||
// cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
|
||||
if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
|
||||
C.uge(1) && C.ule(BitWidth)) {
|
||||
APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
|
||||
return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
|
||||
Builder.CreateAnd(II->getArgOperand(0), Mask),
|
||||
ConstantInt::getNullValue(Ty));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Handle icmp with constant (but not simple integer constant) RHS.
|
||||
Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
|
||||
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
||||
|
@ -903,7 +903,10 @@ private:
|
||||
Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
|
||||
BinaryOperator *BO,
|
||||
const APInt &C);
|
||||
Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C);
|
||||
Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
|
||||
const APInt &C);
|
||||
Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
|
||||
const APInt &C);
|
||||
|
||||
// Helpers of visitSelectInst().
|
||||
Instruction *foldSelectExtConst(SelectInst &Sel);
|
||||
|
@ -149,8 +149,7 @@ define i1 @ctlz_ugt_zero_i32(i32 %x) {
|
||||
|
||||
define i1 @ctlz_ugt_one_i32(i32 %x) {
|
||||
; CHECK-LABEL: @ctlz_ugt_one_i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 1073741824
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
||||
@ -160,8 +159,7 @@ define i1 @ctlz_ugt_one_i32(i32 %x) {
|
||||
|
||||
define i1 @ctlz_ugt_other_i32(i32 %x) {
|
||||
; CHECK-LABEL: @ctlz_ugt_other_i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32768
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
||||
@ -173,7 +171,7 @@ define i1 @ctlz_ugt_other_multiuse_i32(i32 %x, i32* %p) {
|
||||
; CHECK-LABEL: @ctlz_ugt_other_multiuse_i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
|
||||
; CHECK-NEXT: store i32 [[LZ]], i32* [[P:%.*]], align 4
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 16
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X]], 32768
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
||||
@ -184,8 +182,7 @@ define i1 @ctlz_ugt_other_multiuse_i32(i32 %x, i32* %p) {
|
||||
|
||||
define i1 @ctlz_ugt_bw_minus_one_i32(i32 %x) {
|
||||
; CHECK-LABEL: @ctlz_ugt_bw_minus_one_i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[LZ]], 31
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
||||
@ -205,8 +202,7 @@ define <2 x i1> @ctlz_ult_one_v2i32(<2 x i32> %x) {
|
||||
|
||||
define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @ctlz_ult_other_v2i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 16, i32 16>
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
|
||||
; CHECK-NEXT: ret <2 x i1> [[CMP]]
|
||||
;
|
||||
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
|
||||
@ -218,7 +214,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
|
||||
; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
|
||||
; CHECK-NEXT: store <2 x i32> [[LZ]], <2 x i32>* [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 16, i32 16>
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], <i32 65535, i32 65535>
|
||||
; CHECK-NEXT: ret <2 x i1> [[CMP]]
|
||||
;
|
||||
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
|
||||
@ -229,8 +225,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
|
||||
|
||||
define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 31, i32 31>
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 1, i32 1>
|
||||
; CHECK-NEXT: ret <2 x i1> [[CMP]]
|
||||
;
|
||||
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
|
||||
@ -240,8 +235,7 @@ define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
|
||||
|
||||
define <2 x i1> @ctlz_ult_bitwidth_v2i32(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @ctlz_ult_bitwidth_v2i32(
|
||||
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[LZ]], <i32 32, i32 32>
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[CMP]]
|
||||
;
|
||||
%lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
|
||||
@ -359,8 +353,8 @@ define i1 @cttz_ugt_zero_i33(i33 %x) {
|
||||
|
||||
define i1 @cttz_ugt_one_i33(i33 %x) {
|
||||
; CHECK-LABEL: @cttz_ugt_one_i33(
|
||||
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 3
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
|
||||
@ -370,8 +364,8 @@ define i1 @cttz_ugt_one_i33(i33 %x) {
|
||||
|
||||
define i1 @cttz_ugt_other_i33(i33 %x) {
|
||||
; CHECK-LABEL: @cttz_ugt_other_i33(
|
||||
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 131071
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
|
||||
@ -394,8 +388,7 @@ define i1 @cttz_ugt_other_multiuse_i33(i33 %x, i33* %p) {
|
||||
|
||||
define i1 @cttz_ugt_bw_minus_one_i33(i33 %x) {
|
||||
; CHECK-LABEL: @cttz_ugt_bw_minus_one_i33(
|
||||
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i33 [[TZ]], 32
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[X:%.*]], 0
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
;
|
||||
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
|
||||
@ -415,8 +408,8 @@ define <2 x i1> @cttz_ult_one_v2i32(<2 x i32> %x) {
|
||||
|
||||
define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @cttz_ult_other_v2i32(
|
||||
; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 16, i32 16>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[CMP]]
|
||||
;
|
||||
%tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
|
||||
@ -439,8 +432,8 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
|
||||
|
||||
define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32(
|
||||
; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 31, i32 31>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 2147483647, i32 2147483647>
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[CMP]]
|
||||
;
|
||||
%tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
|
||||
@ -450,8 +443,7 @@ define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
|
||||
|
||||
define <2 x i1> @cttz_ult_bitwidth_v2i32(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @cttz_ult_bitwidth_v2i32(
|
||||
; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 32, i32 32>
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[CMP]]
|
||||
;
|
||||
%tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
|
||||
|
Loading…
Reference in New Issue
Block a user