1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[InstCombine] move add after umin/umax

In the motivating cases from PR14613:
https://bugs.llvm.org/show_bug.cgi?id=14613
...moving the add enables us to narrow the
min/max which eliminates zext/trunc which
enables signficantly better vectorization.
But that bug is still not completely fixed.

https://rise4fun.com/Alive/5KQ

  Name: umax
  Pre: C1 u>= C0
  %a = add nuw i8 %x, C0
  %cond = icmp ugt i8 %a, C1
  %r = select i1 %cond, i8 %a, i8 C1
  =>
  %c2 = icmp ugt i8 %x, C1-C0
  %u2 = select i1 %c2, i8 %x, i8 C1-C0
  %r = add nuw i8 %u2, C0

  Name: umin
  Pre: C1 u>= C0
  %a = add nuw i32 %x, C0
  %cond = icmp ult i32 %a, C1
  %r = select i1 %cond, i32 %a, i32 C1
  =>
  %c2 = icmp ult i32 %x, C1-C0
  %u2 = select i1 %c2, i32 %x, i32 C1-C0
  %r = add nuw i32 %u2, C0

llvm-svn: 355221
This commit is contained in:
Sanjay Patel 2019-03-01 19:42:40 +00:00
parent cec385e970
commit 177b843b47
2 changed files with 53 additions and 33 deletions

View File

@ -1563,6 +1563,30 @@ static Instruction *foldSelectCmpXchg(SelectInst &SI) {
return nullptr;
}
static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
Value *Y,
InstCombiner::BuilderTy &Builder) {
assert (SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern");
bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN ||
SPF == SelectPatternFlavor::SPF_UMAX;
// TODO: If InstSimplify could fold all cases where C2 <= C1, we could change
// the constant value check to an assert.
Value *A;
const APInt *C1, *C2;
if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) &&
match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) {
// umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1
// umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1
Value *NewMinMax = createMinMax(Builder, SPF, A,
ConstantInt::get(X->getType(), *C2 - *C1));
return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax,
ConstantInt::get(X->getType(), *C1));
}
// TODO: Handle SMIN/SMAX (similar to unsigned, but the signed subtraction of
// the constants must not overflow).
return nullptr;
}
/// Reduce a sequence of min/max with a common operand.
static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
Value *RHS,
@ -1962,6 +1986,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = moveNotAfterMinMax(RHS, LHS))
return I;
if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder))
return I;
if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
return I;
}

View File

@ -909,9 +909,9 @@ define float @not_min_of_min(i8 %i, float %x) {
define i32 @add_umin(i32 %x) {
; CHECK-LABEL: @add_umin(
; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 42
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 27
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 27
; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[TMP2]], 15
; CHECK-NEXT: ret i32 [[R]]
;
%a = add nuw i32 %x, 15
@ -922,9 +922,8 @@ define i32 @add_umin(i32 %x) {
define i32 @add_umin_constant_limit(i32 %x) {
; CHECK-LABEL: @add_umin_constant_limit(
; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 41
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 42
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 41, i32 42
; CHECK-NEXT: ret i32 [[R]]
;
%a = add nuw i32 %x, 41
@ -1008,9 +1007,9 @@ define i32 @add_umin_extra_use(i32 %x, i32* %p) {
define <2 x i16> @add_umin_vec(<2 x i16> %x) {
; CHECK-LABEL: @add_umin_vec(
; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i16> [[X:%.*]], <i16 15, i16 15>
; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i16> [[A]], <i16 240, i16 240>
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i16> [[A]], <2 x i16> <i16 240, i16 240>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i16> [[X:%.*]], <i16 225, i16 225>
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[X]], <2 x i16> <i16 225, i16 225>
; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i16> [[TMP2]], <i16 15, i16 15>
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%a = add nuw <2 x i16> %x, <i16 15, i16 15>
@ -1021,9 +1020,9 @@ define <2 x i16> @add_umin_vec(<2 x i16> %x) {
define i37 @add_umax(i37 %x) {
; CHECK-LABEL: @add_umax(
; CHECK-NEXT: [[A:%.*]] = add nuw i37 [[X:%.*]], 5
; CHECK-NEXT: [[C:%.*]] = icmp ugt i37 [[A]], 42
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i37 [[A]], i37 42
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 37
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 37
; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[TMP2]], 5
; CHECK-NEXT: ret i37 [[R]]
;
%a = add nuw i37 %x, 5
@ -1034,9 +1033,9 @@ define i37 @add_umax(i37 %x) {
define i37 @add_umax_constant_limit(i37 %x) {
; CHECK-LABEL: @add_umax_constant_limit(
; CHECK-NEXT: [[A:%.*]] = add nuw i37 [[X:%.*]], 81
; CHECK-NEXT: [[C:%.*]] = icmp ugt i37 [[A]], 82
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i37 [[A]], i37 82
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 1
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 1
; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[TMP2]], 81
; CHECK-NEXT: ret i37 [[R]]
;
%a = add nuw i37 %x, 81
@ -1050,9 +1049,7 @@ define i37 @add_umax_constant_limit(i37 %x) {
define i37 @add_umax_simplify(i37 %x) {
; CHECK-LABEL: @add_umax_simplify(
; CHECK-NEXT: [[A:%.*]] = add nuw i37 [[X:%.*]], 42
; CHECK-NEXT: [[C:%.*]] = icmp ugt i37 [[A]], 42
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i37 [[A]], i37 42
; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[X:%.*]], 42
; CHECK-NEXT: ret i37 [[R]]
;
%a = add nuw i37 %x, 42
@ -1124,9 +1121,9 @@ define i32 @add_umax_extra_use(i32 %x, i32* %p) {
define <2 x i33> @add_umax_vec(<2 x i33> %x) {
; CHECK-LABEL: @add_umax_vec(
; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i33> [[X:%.*]], <i33 5, i33 5>
; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i33> [[A]], <i33 240, i33 240>
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i33> [[A]], <2 x i33> <i33 240, i33 240>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i33> [[X:%.*]], <i33 235, i33 235>
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i33> [[X]], <2 x i33> <i33 235, i33 235>
; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i33> [[TMP2]], <i33 5, i33 5>
; CHECK-NEXT: ret <2 x i33> [[R]]
;
%a = add nuw <2 x i33> %x, <i33 5, i33 5>
@ -1137,12 +1134,10 @@ define <2 x i33> @add_umax_vec(<2 x i33> %x) {
define i8 @PR14613_umin(i8 %x) {
; CHECK-LABEL: @PR14613_umin(
; CHECK-NEXT: [[U4:%.*]] = zext i8 [[X:%.*]] to i32
; CHECK-NEXT: [[U5:%.*]] = add nuw nsw i32 [[U4]], 15
; CHECK-NEXT: [[U6:%.*]] = icmp ult i32 [[U5]], 255
; CHECK-NEXT: [[U7:%.*]] = select i1 [[U6]], i32 [[U5]], i32 255
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[U7]] to i8
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -16
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16
; CHECK-NEXT: [[U7:%.*]] = add i8 [[TMP2]], 15
; CHECK-NEXT: ret i8 [[U7]]
;
%u4 = zext i8 %x to i32
%u5 = add nuw nsw i32 %u4, 15
@ -1154,12 +1149,10 @@ define i8 @PR14613_umin(i8 %x) {
define i8 @PR14613_umax(i8 %x) {
; CHECK-LABEL: @PR14613_umax(
; CHECK-NEXT: [[U4:%.*]] = zext i8 [[X:%.*]] to i32
; CHECK-NEXT: [[U5:%.*]] = add nuw nsw i32 [[U4]], 15
; CHECK-NEXT: [[U6:%.*]] = icmp ugt i32 [[U5]], 255
; CHECK-NEXT: [[U7:%.*]] = select i1 [[U6]], i32 [[U5]], i32 255
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[U7]] to i8
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], -16
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16
; CHECK-NEXT: [[U7:%.*]] = add nsw i8 [[TMP2]], 15
; CHECK-NEXT: ret i8 [[U7]]
;
%u4 = zext i8 %x to i32
%u5 = add nuw nsw i32 %u4, 15