From a7d424d173649223d2722d107a5d47c3a0c71347 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 27 Apr 2021 15:14:29 -0400 Subject: [PATCH] [InstCombine] fold clamp to 2 values from min/max intrinsics The "select" versions of these folds is also missing and can cause infinite loops as shown in: https://llvm.org/PR48900 ...but it seems easier to match these as max/min as a first fix. https://alive2.llvm.org/ce/z/wv-_dT --- .../InstCombine/InstCombineCalls.cpp | 44 +++++++++++++++++++ .../InstCombine/minmax-intrinsics.ll | 28 ++++++++---- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 72008dfa3b2..262c2aa9694 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -698,6 +698,47 @@ static Optional getKnownSign(Value *Op, Instruction *CxtI, ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); } +/// If we have a clamp pattern like max (min X, 42), 41 -- where the output +/// can only be one of two possible constant values -- turn that into a select +/// of constants. +static Instruction *foldClampRangeOfTwo(IntrinsicInst *II, + InstCombiner::BuilderTy &Builder) { + Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); + Value *X; + const APInt *C0, *C1; + if (!match(I1, m_APInt(C1)) || !I0->hasOneUse()) + return nullptr; + + CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; + switch (II->getIntrinsicID()) { + case Intrinsic::smax: + if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1) + Pred = ICmpInst::ICMP_SGT; + break; + case Intrinsic::smin: + if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1) + Pred = ICmpInst::ICMP_SLT; + break; + case Intrinsic::umax: + if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1) + Pred = ICmpInst::ICMP_UGT; + break; + case Intrinsic::umin: + if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1) + Pred = ICmpInst::ICMP_ULT; + break; + default: + llvm_unreachable("Expected min/max intrinsic"); + } + if (Pred == CmpInst::BAD_ICMP_PREDICATE) + return nullptr; + + // max (min X, 42), 41 --> X > 41 ? 42 : 41 + // min (max X, 42), 43 --> X < 43 ? 42 : 43 + Value *Cmp = Builder.CreateICmp(Pred, X, I1); + return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -942,6 +983,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return replaceInstUsesWith(CI, Abs); } + if (Instruction *Sel = foldClampRangeOfTwo(II, Builder)) + return Sel; + break; } case Intrinsic::bswap: { diff --git a/test/Transforms/InstCombine/minmax-intrinsics.ll b/test/Transforms/InstCombine/minmax-intrinsics.ll index f447c0a7db0..3b2279a92bf 100644 --- a/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -706,8 +706,8 @@ define i8 @smax_negation_uses(i8 %x, i8 %y) { define i8 @clamp_two_vals_smax_smin(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_smax_smin( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 42) -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 43) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 43 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i8 42, i8 43 ; CHECK-NEXT: ret i8 [[R]] ; %m = call i8 @llvm.smax.i8(i8 %x, i8 42) @@ -717,8 +717,8 @@ define i8 @clamp_two_vals_smax_smin(i8 %x) { define <3 x i8> @clamp_two_vals_smin_smax(<3 x i8> %x) { ; CHECK-LABEL: @clamp_two_vals_smin_smax( -; CHECK-NEXT: [[M:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> ) -; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[M]], <3 x i8> ) +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> , <3 x i8> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %x, <3 x i8> ) @@ -728,8 +728,8 @@ define <3 x i8> @clamp_two_vals_smin_smax(<3 x i8> %x) { define i8 @clamp_two_vals_umax_umin(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_umax_umin( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 42) -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 43) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 43 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i8 42, i8 43 ; CHECK-NEXT: ret i8 [[R]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 42) @@ -739,8 +739,8 @@ define i8 @clamp_two_vals_umax_umin(i8 %x) { define i8 @clamp_two_vals_umin_umax(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_umin_umax( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 42) -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 41) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], 41 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i8 42, i8 41 ; CHECK-NEXT: ret i8 [[R]] ; %m = call i8 @llvm.umin.i8(i8 %x, i8 42) @@ -748,6 +748,8 @@ define i8 @clamp_two_vals_umin_umax(i8 %x) { ret i8 %r } +; Negative test - mismatched signs + define i8 @clamp_two_vals_smax_umin(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_smax_umin( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 42) @@ -759,6 +761,8 @@ define i8 @clamp_two_vals_smax_umin(i8 %x) { ret i8 %r } +; Negative test - wrong range + define i8 @clamp_three_vals_smax_smin(i8 %x) { ; CHECK-LABEL: @clamp_three_vals_smax_smin( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 42) @@ -770,6 +774,8 @@ define i8 @clamp_three_vals_smax_smin(i8 %x) { ret i8 %r } +; Edge cases are simplified + define i8 @clamp_two_vals_umax_umin_edge(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_umax_umin_edge( ; CHECK-NEXT: ret i8 0 @@ -779,6 +785,8 @@ define i8 @clamp_two_vals_umax_umin_edge(i8 %x) { ret i8 %r } +; Edge cases are simplified + define i8 @clamp_two_vals_umin_umax_edge(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_umin_umax_edge( ; CHECK-NEXT: ret i8 -1 @@ -788,6 +796,8 @@ define i8 @clamp_two_vals_umin_umax_edge(i8 %x) { ret i8 %r } +; Edge cases are simplified + define i8 @clamp_two_vals_smax_smin_edge(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_smax_smin_edge( ; CHECK-NEXT: ret i8 -128 @@ -797,6 +807,8 @@ define i8 @clamp_two_vals_smax_smin_edge(i8 %x) { ret i8 %r } +; Edge cases are simplified + define i8 @clamp_two_vals_smin_smax_edge(i8 %x) { ; CHECK-LABEL: @clamp_two_vals_smin_smax_edge( ; CHECK-NEXT: ret i8 127