mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 19:42:54 +02:00
26cf2128b6
Summary: While `-div-rem-pairs` pass can decompose rem in div+rem pair when div-rem pair is unsupported by target, nothing performs the opposite fold. We can't do that in InstCombine or DAGCombine since neither of those has access to TTI. So it makes most sense to teach `-div-rem-pairs` about it. If we matched rem in expanded form, we know we will be able to place div-rem pair next to each other so we won't regress the situation. Also, we shouldn't decompose rem if we matched already-decomposed form. This is surprisingly straight-forward otherwise. The original patch was committed in rL367288 but was reverted in rL367289 because it exposed pre-existing RAUW issues in internal data structures of the pass; those now have been addressed in a previous patch. https://bugs.llvm.org/show_bug.cgi?id=42673 Reviewers: spatel, RKSimon, efriedma, ZaMaZaN4iK, bogner Reviewed By: bogner Subscribers: bogner, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65298 llvm-svn: 367419
75 lines
2.6 KiB
C++
75 lines
2.6 KiB
C++
//===- llvm/Transforms/Utils/BypassSlowDivision.h ---------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file contains an optimization for div and rem on architectures that
|
|
// execute short instructions significantly faster than longer instructions.
|
|
// For example, on Intel Atom 32-bit divides are slow enough that during
|
|
// runtime it is profitable to check the value of the operands, and if they are
|
|
// positive and less than 256 use an unsigned 8-bit divide.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
|
|
#define LLVM_TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
|
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/DenseMapInfo.h"
|
|
#include "llvm/IR/ValueHandle.h"
|
|
#include <cstdint>
|
|
|
|
namespace llvm {
|
|
|
|
class BasicBlock;
|
|
class Value;
|
|
|
|
struct DivRemMapKey {
|
|
bool SignedOp;
|
|
AssertingVH<Value> Dividend;
|
|
AssertingVH<Value> Divisor;
|
|
|
|
DivRemMapKey() = default;
|
|
|
|
DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
|
|
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
|
|
};
|
|
|
|
template <> struct DenseMapInfo<DivRemMapKey> {
|
|
static bool isEqual(const DivRemMapKey &Val1, const DivRemMapKey &Val2) {
|
|
return Val1.SignedOp == Val2.SignedOp && Val1.Dividend == Val2.Dividend &&
|
|
Val1.Divisor == Val2.Divisor;
|
|
}
|
|
|
|
static DivRemMapKey getEmptyKey() {
|
|
return DivRemMapKey(false, nullptr, nullptr);
|
|
}
|
|
|
|
static DivRemMapKey getTombstoneKey() {
|
|
return DivRemMapKey(true, nullptr, nullptr);
|
|
}
|
|
|
|
static unsigned getHashValue(const DivRemMapKey &Val) {
|
|
return (unsigned)(reinterpret_cast<uintptr_t>(
|
|
static_cast<Value *>(Val.Dividend)) ^
|
|
reinterpret_cast<uintptr_t>(
|
|
static_cast<Value *>(Val.Divisor))) ^
|
|
(unsigned)Val.SignedOp;
|
|
}
|
|
};
|
|
|
|
/// This optimization identifies DIV instructions in a BB that can be
|
|
/// profitably bypassed and carried out with a shorter, faster divide.
|
|
///
|
|
/// This optimization may add basic blocks immediately after BB; for obvious
|
|
/// reasons, you shouldn't pass those blocks to bypassSlowDivision.
|
|
bool bypassSlowDivision(
|
|
BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidth);
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
|