mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[InstCombine] add unsigned saturation subtraction canonicalizations
This is the instcombine part of unsigned saturation canonicalization. Backend patches already commited: https://reviews.llvm.org/D37510 https://reviews.llvm.org/D37534 It converts unsigned saturated subtraction patterns to forms recognized by the backend: (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) (b < a) ? a - b : 0 -> ((a > b) ? a : b) - b) (b > a) ? 0 : a - b -> ((a > b) ? a : b) - b) (a < b) ? 0 : a - b -> ((a > b) ? a : b) - b) ((a > b) ? b - a : 0) -> - ((a > b) ? a : b) - b) ((b < a) ? b - a : 0) -> - ((a > b) ? a : b) - b) ((b > a) ? 0 : b - a) -> - ((a > b) ? a : b) - b) ((a < b) ? 0 : b - a) -> - ((a > b) ? a : b) - b) Patch by Yulia Koval! Differential Revision: https://reviews.llvm.org/D41480 llvm-svn: 324255
This commit is contained in:
parent
c85b081732
commit
01bec64eab
@ -538,6 +538,59 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
|
||||
return Builder.CreateOr(V, Y);
|
||||
}
|
||||
|
||||
/// Transform patterns such as: (a > b) ? a - b : 0
|
||||
/// into: ((a > b) ? a : b) - b)
|
||||
/// This produces a canonical max pattern that is more easily recognized by the
|
||||
/// backend and converted into saturated subtraction instructions if those
|
||||
/// exist.
|
||||
/// There are 8 commuted/swapped variants of this pattern.
|
||||
/// TODO: Also support a - UMIN(a,b) patterns.
|
||||
static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
|
||||
const Value *TrueVal,
|
||||
const Value *FalseVal,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
ICmpInst::Predicate Pred = ICI->getPredicate();
|
||||
if (!ICmpInst::isUnsigned(Pred))
|
||||
return nullptr;
|
||||
|
||||
// (b > a) ? 0 : a - b -> (b <= a) ? a - b : 0
|
||||
if (match(TrueVal, m_Zero())) {
|
||||
Pred = ICmpInst::getInversePredicate(Pred);
|
||||
std::swap(TrueVal, FalseVal);
|
||||
}
|
||||
if (!match(FalseVal, m_Zero()))
|
||||
return nullptr;
|
||||
|
||||
Value *A = ICI->getOperand(0);
|
||||
Value *B = ICI->getOperand(1);
|
||||
if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_ULT) {
|
||||
// (b < a) ? a - b : 0 -> (a > b) ? a - b : 0
|
||||
std::swap(A, B);
|
||||
Pred = ICmpInst::getSwappedPredicate(Pred);
|
||||
}
|
||||
|
||||
assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) &&
|
||||
"Unexpected isUnsigned predicate!");
|
||||
|
||||
// Account for swapped form of subtraction: ((a > b) ? b - a : 0).
|
||||
bool IsNegative = false;
|
||||
if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A))))
|
||||
IsNegative = true;
|
||||
else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B))))
|
||||
return nullptr;
|
||||
|
||||
// If sub is used anywhere else, we wouldn't be able to eliminate it
|
||||
// afterwards.
|
||||
if (!TrueVal->hasOneUse())
|
||||
return nullptr;
|
||||
|
||||
// All checks passed, convert to canonical unsigned saturated subtraction
|
||||
// form: sub(max()).
|
||||
// (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
|
||||
Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
|
||||
return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B);
|
||||
}
|
||||
|
||||
/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
|
||||
/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
|
||||
///
|
||||
@ -860,10 +913,12 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
|
||||
if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
|
||||
return replaceInstUsesWith(SI, V);
|
||||
|
||||
if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder))
|
||||
return replaceInstUsesWith(SI, V);
|
||||
|
||||
return Changed ? &SI : nullptr;
|
||||
}
|
||||
|
||||
|
||||
/// SI is a select whose condition is a PHI node (but the two may be in
|
||||
/// different blocks). See if the true/false values (V) are live in all of the
|
||||
/// predecessor blocks of the PHI. For example, cases like this can't be mapped:
|
||||
|
@ -12,10 +12,10 @@ declare void @use(i64)
|
||||
|
||||
define i64 @max_sub_ugt(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @max_sub_ugt(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%sub = sub i64 %a, %b
|
||||
@ -23,15 +23,30 @@ define i64 @max_sub_ugt(i64 %a, i64 %b) {
|
||||
ret i64 %sel
|
||||
}
|
||||
|
||||
; (a >= b) ? a - b : 0 -> ((a >= b) ? a : b) - b)
|
||||
|
||||
define i64 @max_sub_uge(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @max_sub_uge(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp uge i64 %a, %b
|
||||
%sub = sub i64 %a, %b
|
||||
%sel = select i1 %cmp, i64 %sub ,i64 0
|
||||
ret i64 %sel
|
||||
}
|
||||
|
||||
; Again, with vectors:
|
||||
; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
|
||||
|
||||
define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: @max_sub_ugt_vec(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A]], [[B]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SUB]], <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: ret <4 x i32> [[SEL]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[B]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[B]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ugt <4 x i32> %a, %b
|
||||
%sub = sub <4 x i32> %a, %b
|
||||
@ -44,12 +59,12 @@ define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) {
|
||||
|
||||
define i64 @max_sub_ult(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @max_sub_ult(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
|
||||
; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
|
||||
; CHECK-NEXT: call void @use(i64 [[EXTRASUB]])
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ult i64 %b, %a
|
||||
%sub = sub i64 %a, %b
|
||||
@ -63,12 +78,12 @@ define i64 @max_sub_ult(i64 %a, i64 %b) {
|
||||
|
||||
define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @max_sub_ugt_sel_swapped(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
|
||||
; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
|
||||
; CHECK-NEXT: call void @use(i64 [[EXTRASUB]])
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %b, %a
|
||||
%sub = sub i64 %a, %b
|
||||
@ -82,10 +97,10 @@ define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
|
||||
|
||||
define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @max_sub_ult_sel_swapped(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ult i64 %a, %b
|
||||
%sub = sub i64 %a, %b
|
||||
@ -97,12 +112,12 @@ define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) {
|
||||
|
||||
define i64 @neg_max_sub_ugt(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @neg_max_sub_ugt(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
|
||||
; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
|
||||
; CHECK-NEXT: call void @use(i64 [[EXTRASUB]])
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%sub = sub i64 %b, %a
|
||||
@ -116,10 +131,10 @@ define i64 @neg_max_sub_ugt(i64 %a, i64 %b) {
|
||||
|
||||
define i64 @neg_max_sub_ult(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @neg_max_sub_ult(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ult i64 %b, %a
|
||||
%sub = sub i64 %b, %a
|
||||
@ -131,10 +146,10 @@ define i64 @neg_max_sub_ult(i64 %a, i64 %b) {
|
||||
|
||||
define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %b, %a
|
||||
%sub = sub i64 %b, %a
|
||||
@ -146,12 +161,12 @@ define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
|
||||
|
||||
define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @neg_max_sub_ult_sel_swapped(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]]
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
|
||||
; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
|
||||
; CHECK-NEXT: call void @use(i64 [[EXTRASUB]])
|
||||
; CHECK-NEXT: ret i64 [[SEL]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
%cmp = icmp ult i64 %a, %b
|
||||
%sub = sub i64 %b, %a
|
||||
|
Loading…
x
Reference in New Issue
Block a user