mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
Revert "[SCEV] Model ashr exact x, C
as (abs(x) EXACT/u (1<<C)) * signum(x)
"
As being discussed in https://reviews.llvm.org/D100721, this modelling is lossy, we can't reconstruct `ash`/`ashr exact` from it, which means that whenever we actually expand the IR, we've just pessimized the code.. It would be good to model this pattern, after all it comes up every time you want to compute a distance between two pointers, but not at this cost. This reverts commit ec54867df5e7f20e12146e628af34f0384308bcb.
This commit is contained in:
parent
79c9c7433b
commit
5a174914f6
@ -575,7 +575,6 @@ public:
|
||||
const SCEV *getGEPExpr(GEPOperator *GEP,
|
||||
const SmallVectorImpl<const SCEV *> &IndexExprs);
|
||||
const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
|
||||
const SCEV *getSignumExpr(const SCEV *Op);
|
||||
const SCEV *getMinMaxExpr(SCEVTypes Kind,
|
||||
SmallVectorImpl<const SCEV *> &Operands);
|
||||
const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
|
||||
|
@ -3503,11 +3503,6 @@ const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) {
|
||||
return getSMaxExpr(Op, getNegativeSCEV(Op, Flags));
|
||||
}
|
||||
|
||||
const SCEV *ScalarEvolution::getSignumExpr(const SCEV *Op) {
|
||||
Type *Ty = Op->getType();
|
||||
return getSMinExpr(getSMaxExpr(Op, getMinusOne(Ty)), getOne(Ty));
|
||||
}
|
||||
|
||||
const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
|
||||
SmallVectorImpl<const SCEV *> &Ops) {
|
||||
assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
|
||||
@ -4559,7 +4554,6 @@ struct BinaryOp {
|
||||
Value *RHS;
|
||||
bool IsNSW = false;
|
||||
bool IsNUW = false;
|
||||
bool IsExact = false;
|
||||
|
||||
/// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
|
||||
/// constant expression.
|
||||
@ -4572,14 +4566,11 @@ struct BinaryOp {
|
||||
IsNSW = OBO->hasNoSignedWrap();
|
||||
IsNUW = OBO->hasNoUnsignedWrap();
|
||||
}
|
||||
if (auto *PEO = dyn_cast<PossiblyExactOperator>(Op))
|
||||
IsExact = PEO->isExact();
|
||||
}
|
||||
|
||||
explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
|
||||
bool IsNUW = false, bool IsExact = false)
|
||||
: Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW),
|
||||
IsExact(IsExact) {}
|
||||
bool IsNUW = false)
|
||||
: Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
@ -6745,15 +6736,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (BO->IsExact) {
|
||||
// Given exact arithmetic in-bounds right-shift by a constant,
|
||||
// we can lower it into: (abs(x) EXACT/u (1<<C)) * signum(x)
|
||||
const SCEV *X = getSCEV(BO->LHS);
|
||||
const SCEV *AbsX = getAbsExpr(X, /*IsNSW=*/false);
|
||||
APInt Mult = APInt::getOneBitSet(BitWidth, AShrAmt);
|
||||
const SCEV *Div = getUDivExactExpr(AbsX, getConstant(Mult));
|
||||
return getMulExpr(Div, getSignumExpr(X), SCEV::FlagNSW);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ define i32 @t3(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: 't3'
|
||||
; CHECK-NEXT: Classifying expressions for: @t3
|
||||
; CHECK-NEXT: %i0 = ashr exact i32 %x, 4
|
||||
; CHECK-NEXT: --> ((((-1 * %x) smax %x) /u 16) * (1 smin (-1 smax %x)))<nsw> U: [-268435455,268435456) S: [-268435455,268435456)
|
||||
; CHECK-NEXT: --> %i0 U: [-134217728,134217728) S: [-134217728,134217728)
|
||||
; CHECK-NEXT: Determining loop execution counts for: @t3
|
||||
;
|
||||
%i0 = ashr exact i32 %x, 4
|
||||
@ -65,7 +65,7 @@ define i32 @t5(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: 't5'
|
||||
; CHECK-NEXT: Classifying expressions for: @t5
|
||||
; CHECK-NEXT: %i0 = ashr exact i32 %x, 5
|
||||
; CHECK-NEXT: --> ((((-1 * %x) smax %x) /u 32) * (1 smin (-1 smax %x)))<nsw> U: [-134217727,134217728) S: [-134217727,134217728)
|
||||
; CHECK-NEXT: --> %i0 U: [-67108864,67108864) S: [-67108864,67108864)
|
||||
; CHECK-NEXT: Determining loop execution counts for: @t5
|
||||
;
|
||||
%i0 = ashr exact i32 %x, 5
|
||||
|
@ -473,9 +473,9 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
|
||||
; X64-NEXT: %i10 = sub i64 %i9, %i4
|
||||
; X64-NEXT: --> {0,+,4}<nw><%bb6> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> LoopDispositions: { %bb6: Computable }
|
||||
; X64-NEXT: %i11 = ashr exact i64 %i10, 2
|
||||
; X64-NEXT: --> ((({0,+,4}<nw><%bb6> smax {0,+,-4}<nw><%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<nw><%bb6>)))<nsw> U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: ((((4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw>)))<nsw> LoopDispositions: { %bb6: Computable }
|
||||
; X64-NEXT: --> %i11 U: [-2305843009213693952,2305843009213693952) S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
|
||||
; X64-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
|
||||
; X64-NEXT: --> ((4 * (({0,+,4}<nw><%bb6> smax {0,+,-4}<nw><%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<nw><%bb6>))) + %arg2) U: full-set S: full-set Exits: ((4 * (((4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw> smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))<nuw>))) + %arg2) LoopDispositions: { %bb6: Computable }
|
||||
; X64-NEXT: --> ((4 * %i11)<nsw> + %arg2) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
|
||||
; X64-NEXT: %i13 = load i32, i32* %i12, align 4
|
||||
; X64-NEXT: --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
|
||||
; X64-NEXT: %i14 = add nsw i32 %i13, %i8
|
||||
@ -502,9 +502,9 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
|
||||
; X32-NEXT: %i10 = sub i64 %i9, %i4
|
||||
; X32-NEXT: --> {0,+,4}<nw><%bb6> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))<nuw><nsw> LoopDispositions: { %bb6: Computable }
|
||||
; X32-NEXT: %i11 = ashr exact i64 %i10, 2
|
||||
; X32-NEXT: --> ({0,+,1}<nw><%bb6> * (1 smin {0,+,4}<nuw><nsw><%bb6>))<nuw><nsw> U: [0,1073741824) S: [0,1073741824) Exits: (((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4) * (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))<nuw><nsw>))<nuw><nsw> LoopDispositions: { %bb6: Computable }
|
||||
; X32-NEXT: --> %i11 U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
|
||||
; X32-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
|
||||
; X32-NEXT: --> (((trunc i64 (1 smin {0,+,4}<nuw><nsw><%bb6>) to i32) * {0,+,4}<%bb6>) + %arg2) U: full-set S: full-set Exits: ((4 * (trunc i64 (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))<nuw><nsw>) to i32) * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg2) LoopDispositions: { %bb6: Computable }
|
||||
; X32-NEXT: --> ((4 * (trunc i64 %i11 to i32))<nsw> + %arg2) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
|
||||
; X32-NEXT: %i13 = load i32, i32* %i12, align 4
|
||||
; X32-NEXT: --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
|
||||
; X32-NEXT: %i14 = add nsw i32 %i13, %i8
|
||||
|
@ -6,13 +6,8 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
define float @ashr_expansion_valid(i64 %x, float* %ptr) {
|
||||
; CHECK-LABEL: @ashr_expansion_valid(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[X:%.*]], i64 -1)
|
||||
; CHECK-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[SMAX]], i64 1)
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[X]]
|
||||
; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[X]], i64 [[TMP0]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[SMAX1]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 [[SMIN]], [[TMP1]]
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2]], i64 1)
|
||||
; CHECK-NEXT: [[BOUND:%.*]] = ashr exact i64 [[X:%.*]], 4
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1)
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
||||
|
Loading…
Reference in New Issue
Block a user