1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[NFC] Remove waymarking because it improves performances

Summary:
This patch remove waymarking and replaces it with storing a pointer to the User in the Use.
here are the results on the measurements for the CTMark tests of the test suite.
```
Metric: instructions_count

Program                                                      baseline      patched       diff
 test-suite :: CTMark/ClamAV/clamscan.test                    72557942065   71733653521  -1.1%
 test-suite :: CTMark/sqlite3/sqlite3.test                    76281422939   75484840636  -1.0%
 test-suite :: CTMark/consumer-typeset/consumer-typeset.test  51364676366   50862185614  -1.0%
 test-suite :: CTMark/SPASS/SPASS.test                        60476106505   59908437767  -0.9%
 test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test              112578442329  111725050856 -0.8%
 test-suite :: CTMark/mafft/pairlocalalign.test               50846133013   50473644539  -0.7%
 test-suite :: CTMark/kimwitu++/kc.test                       54692641250   54349070299  -0.6%
 test-suite :: CTMark/7zip/7zip-benchmark.test                182216614747  181216091230 -0.5%
 test-suite :: CTMark/Bullet/bullet.test                      123459210616  122905866767 -0.4%
 Geomean difference                                                                      -0.8%

Metric: peak_memory_use

Program                                                      baseline  patched   diff
 test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test              326864    338524    3.6%
 test-suite :: CTMark/sqlite3/sqlite3.test                    216412    221240    2.2%
 test-suite :: CTMark/7zip/7zip-benchmark.test                11808284  12022604  1.8%
 test-suite :: CTMark/Bullet/bullet.test                      6831752   6945988   1.7%
 test-suite :: CTMark/SPASS/SPASS.test                        2682552   2721820   1.5%
 test-suite :: CTMark/ClamAV/clamscan.test                    5037256   5107936   1.4%
 test-suite :: CTMark/consumer-typeset/consumer-typeset.test  2752728   2790768   1.4%
 test-suite :: CTMark/mafft/pairlocalalign.test               1517676   1537244   1.3%
 test-suite :: CTMark/kimwitu++/kc.test                       1090748   1103448   1.2%
 Geomean difference                                                               1.8%

Metric: compile_time

Program                                                      baseline patched diff
 test-suite :: CTMark/consumer-typeset/consumer-typeset.test  14.71    14.38  -2.2%
 test-suite :: CTMark/sqlite3/sqlite3.test                    23.18    22.73  -2.0%
 test-suite :: CTMark/7zip/7zip-benchmark.test                57.96    56.99  -1.7%
 test-suite :: CTMark/ClamAV/clamscan.test                    20.75    20.49  -1.2%
 test-suite :: CTMark/kimwitu++/kc.test                       18.35    18.15  -1.1%
 test-suite :: CTMark/SPASS/SPASS.test                        18.72    18.57  -0.8%
 test-suite :: CTMark/mafft/pairlocalalign.test               14.09    14.00  -0.6%
 test-suite :: CTMark/Bullet/bullet.test                      37.38    37.19  -0.5%
 test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test              33.81    33.76  -0.2%
 Geomean difference                                                           -1.1%
```

i believe that it is worth trading +1.8% peak memory use for -1.1% compile time.
also this patch removes waymarking which simplifies the Use and User classes.

Reviewers: nikic, lattner

Reviewed By: lattner

Subscribers: russell.gallop, foad, ggreif, rriddle, ekatz, fhahn, lebedev.ri, mgorny, hiraditya, george.burgess.iv, asbirlea, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77144
This commit is contained in:
Tyker 2020-04-17 10:53:01 +02:00
parent f21a7a86a2
commit 711b3d653c
9 changed files with 21 additions and 344 deletions

View File

@ -3187,140 +3187,6 @@ memory layouts:
*(In the above figures* '``P``' *stands for the* ``Use**`` *that is stored in *(In the above figures* '``P``' *stands for the* ``Use**`` *that is stored in
each* ``Use`` *object in the member* ``Use::Prev`` *)* each* ``Use`` *object in the member* ``Use::Prev`` *)*
.. _Waymarking:
The waymarking algorithm
^^^^^^^^^^^^^^^^^^^^^^^^
Since the ``Use`` objects are deprived of the direct (back)pointer to their
``User`` objects, there must be a fast and exact method to recover it. This is
accomplished by the following scheme:
A bit-encoding in the 2 LSBits (least significant bits) of the ``Use::Prev``
allows to find the start of the ``User`` object:
* ``00`` --- binary digit 0
* ``01`` --- binary digit 1
* ``10`` --- stop and calculate (``s``)
* ``11`` --- full stop (``S``)
Given a ``Use*``, all we have to do is to walk till we get a stop and we either
have a ``User`` immediately behind or we have to walk to the next stop picking
up digits and calculating the offset:
.. code-block:: none
.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
| 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
|+15 |+10 |+6 |+3 |+1
| | | | | __>
| | | | __________>
| | | ______________________>
| | ______________________________________>
| __________________________________________________________>
Only the significant number of bits need to be stored between the stops, so that
the *worst case is 20 memory accesses* when there are 1000 ``Use`` objects
associated with a ``User``.
.. _ReferenceImpl:
Reference implementation
^^^^^^^^^^^^^^^^^^^^^^^^
The following literate Haskell fragment demonstrates the concept:
.. code-block:: haskell
> import Test.QuickCheck
>
> digits :: Int -> [Char] -> [Char]
> digits 0 acc = '0' : acc
> digits 1 acc = '1' : acc
> digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
>
> dist :: Int -> [Char] -> [Char]
> dist 0 [] = ['S']
> dist 0 acc = acc
> dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
> dist n acc = dist (n - 1) $ dist 1 acc
>
> takeLast n ss = reverse $ take n $ reverse ss
>
> test = takeLast 40 $ dist 20 []
>
Printing <test> gives: ``"1s100000s11010s10100s1111s1010s110s11s1S"``
The reverse algorithm computes the length of the string just by examining a
certain prefix:
.. code-block:: haskell
> pref :: [Char] -> Int
> pref "S" = 1
> pref ('s':'1':rest) = decode 2 1 rest
> pref (_:rest) = 1 + pref rest
>
> decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
> decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
> decode walk acc _ = walk + acc
>
Now, as expected, printing <pref test> gives ``40``.
We can *quickCheck* this with following property:
.. code-block:: haskell
> testcase = dist 2000 []
> testcaseLength = length testcase
>
> identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
> where arr = takeLast n testcase
>
As expected <quickCheck identityProp> gives:
::
*Main> quickCheck identityProp
OK, passed 100 tests.
Let's be a bit more exhaustive:
.. code-block:: haskell
>
> deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
>
And here is the result of <deepCheck identityProp>:
::
*Main> deepCheck identityProp
OK, passed 500 tests.
.. _Tagging:
Tagging considerations
^^^^^^^^^^^^^^^^^^^^^^
To maintain the invariant that the 2 LSBits of each ``Use**`` in ``Use`` never
change after being set up, setters of ``Use::Prev`` must re-tag the new
``Use**`` on every modification. Accordingly getters must strip the tag bits.
For layout b) instead of the ``User`` we find a pointer (``User*`` with LSBit
set). Following this pointer brings us to the ``User``. A portable trick
ensures that the first bytes of ``User`` (if interpreted as a pointer) never has
the LSBit set. (Portability is relying on the fact that all known compilers
place the ``vptr`` in the first word of the instances.)
.. _polymorphism: .. _polymorphism:
Designing Type Hierarchies and Polymorphic Interfaces Designing Type Hierarchies and Polymorphic Interfaces

View File

@ -499,14 +499,11 @@ public:
using const_block_iterator = BasicBlock *const *; using const_block_iterator = BasicBlock *const *;
block_iterator block_begin() { block_iterator block_begin() {
auto *Ref = reinterpret_cast<Use::UserRef *>(op_begin() + ReservedSpace); return reinterpret_cast<block_iterator>(op_begin() + ReservedSpace);
return reinterpret_cast<block_iterator>(Ref + 1);
} }
const_block_iterator block_begin() const { const_block_iterator block_begin() const {
const auto *Ref = return reinterpret_cast<const_block_iterator>(op_begin() + ReservedSpace);
reinterpret_cast<const Use::UserRef *>(op_begin() + ReservedSpace);
return reinterpret_cast<const_block_iterator>(Ref + 1);
} }
block_iterator block_end() { return block_begin() + getNumOperands(); } block_iterator block_end() { return block_begin() + getNumOperands(); }

View File

@ -2549,15 +2549,11 @@ public:
using const_block_iterator = BasicBlock * const *; using const_block_iterator = BasicBlock * const *;
block_iterator block_begin() { block_iterator block_begin() {
Use::UserRef *ref = return reinterpret_cast<block_iterator>(op_begin() + ReservedSpace);
reinterpret_cast<Use::UserRef*>(op_begin() + ReservedSpace);
return reinterpret_cast<block_iterator>(ref + 1);
} }
const_block_iterator block_begin() const { const_block_iterator block_begin() const {
const Use::UserRef *ref = return reinterpret_cast<const_block_iterator>(op_begin() + ReservedSpace);
reinterpret_cast<const Use::UserRef*>(op_begin() + ReservedSpace);
return reinterpret_cast<const_block_iterator>(ref + 1);
} }
block_iterator block_end() { block_iterator block_end() {

View File

@ -41,17 +41,6 @@ class Value;
/// all of the uses for a particular value definition. It also supports jumping /// all of the uses for a particular value definition. It also supports jumping
/// directly to the used value when we arrive from the User's operands, and /// directly to the used value when we arrive from the User's operands, and
/// jumping directly to the User when we arrive from the Value's uses. /// jumping directly to the User when we arrive from the Value's uses.
///
/// The pointer to the used Value is explicit, and the pointer to the User is
/// implicit. The implicit pointer is found via a waymarking algorithm
/// described in the programmer's manual:
///
/// http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm
///
/// This is essentially the single most memory intensive object in LLVM because
/// of the number of uses in the system. At the same time, the constant time
/// operations it allows are essential to many optimizations having reasonable
/// time complexity.
class Use { class Use {
public: public:
Use(const Use &U) = delete; Use(const Use &U) = delete;
@ -60,34 +49,6 @@ public:
/// that also works with less standard-compliant compilers /// that also works with less standard-compliant compilers
void swap(Use &RHS); void swap(Use &RHS);
/// Pointer traits for the UserRef PointerIntPair. This ensures we always
/// use the LSB regardless of pointer alignment on different targets.
struct UserRefPointerTraits {
static inline void *getAsVoidPointer(User *P) { return P; }
static inline User *getFromVoidPointer(void *P) {
return (User *)P;
}
static constexpr int NumLowBitsAvailable = 1;
};
// A type for the word following an array of hung-off Uses in memory, which is
// a pointer back to their User with the bottom bit set.
using UserRef = PointerIntPair<User *, 1, unsigned, UserRefPointerTraits>;
/// Pointer traits for the Prev PointerIntPair. This ensures we always use
/// the two LSBs regardless of pointer alignment on different targets.
struct PrevPointerTraits {
static inline void *getAsVoidPointer(Use **P) { return P; }
static inline Use **getFromVoidPointer(void *P) {
return (Use **)P;
}
static constexpr int NumLowBitsAvailable = 2;
};
private: private:
/// Destructor - Only for zap() /// Destructor - Only for zap()
~Use() { ~Use() {
@ -95,13 +56,12 @@ private:
removeFromList(); removeFromList();
} }
enum PrevPtrTag { zeroDigitTag, oneDigitTag, stopTag, fullStopTag };
/// Constructor /// Constructor
Use(PrevPtrTag tag) { Prev.setInt(tag); } Use(User *Parent) : Parent(Parent) {}
public: public:
friend class Value; friend class Value;
friend class User;
operator Value *() const { return Val; } operator Value *() const { return Val; }
Value *get() const { return Val; } Value *get() const { return Val; }
@ -110,7 +70,7 @@ public:
/// ///
/// For an instruction operand, for example, this will return the /// For an instruction operand, for example, this will return the
/// instruction. /// instruction.
User *getUser() const LLVM_READONLY; User *getUser() const { return Parent; };
inline void set(Value *Val); inline void set(Value *Val);
@ -125,24 +85,18 @@ public:
/// Return the operand # of this use in its User. /// Return the operand # of this use in its User.
unsigned getOperandNo() const; unsigned getOperandNo() const;
/// Initializes the waymarking tags on an array of Uses.
///
/// This sets up the array of Uses such that getUser() can find the User from
/// any of those Uses.
static Use *initTags(Use *Start, Use *Stop);
/// Destroys Use operands when the number of operands of /// Destroys Use operands when the number of operands of
/// a User changes. /// a User changes.
static void zap(Use *Start, const Use *Stop, bool del = false); static void zap(Use *Start, const Use *Stop, bool del = false);
private: private:
const Use *getImpliedUser() const LLVM_READONLY;
Value *Val = nullptr; Value *Val = nullptr;
Use *Next = nullptr; Use *Next = nullptr;
PointerIntPair<Use **, 2, PrevPtrTag, PrevPointerTraits> Prev; Use **Prev = nullptr;
User *Parent = nullptr;
void setPrev(Use **NewPrev) { Prev.setPointer(NewPrev); } void setPrev(Use **NewPrev) { Prev = NewPrev; }
void addToList(Use **List) { void addToList(Use **List) {
Next = *List; Next = *List;
@ -153,7 +107,7 @@ private:
} }
void removeFromList() { void removeFromList() {
Use **StrippedPrev = Prev.getPointer(); Use **StrippedPrev = Prev;
*StrippedPrev = Next; *StrippedPrev = Next;
if (Next) if (Next)
Next->setPrev(StrippedPrev); Next->setPrev(StrippedPrev);

View File

@ -72,8 +72,6 @@ using ValueName = StringMapEntry<Value *>;
/// objects that watch it and listen to RAUW and Destroy events. See /// objects that watch it and listen to RAUW and Destroy events. See
/// llvm/IR/ValueHandle.h for details. /// llvm/IR/ValueHandle.h for details.
class Value { class Value {
// The least-significant bit of the first word of Value *must* be zero:
// http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm
Type *VTy; Type *VTy;
Use *UseList; Use *UseList;

View File

@ -37,52 +37,10 @@ void Use::swap(Use &RHS) {
} }
} }
User *Use::getUser() const {
const Use *End = getImpliedUser();
const UserRef *ref = reinterpret_cast<const UserRef *>(End);
return ref->getInt() ? ref->getPointer()
: reinterpret_cast<User *>(const_cast<Use *>(End));
}
unsigned Use::getOperandNo() const { unsigned Use::getOperandNo() const {
return this - getUser()->op_begin(); return this - getUser()->op_begin();
} }
// Sets up the waymarking algorithm's tags for a series of Uses. See the
// algorithm details here:
//
// http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm
//
Use *Use::initTags(Use *const Start, Use *Stop) {
ptrdiff_t Done = 0;
while (Done < 20) {
if (Start == Stop--)
return Start;
static const PrevPtrTag tags[20] = {
fullStopTag, oneDigitTag, stopTag, oneDigitTag, oneDigitTag,
stopTag, zeroDigitTag, oneDigitTag, oneDigitTag, stopTag,
zeroDigitTag, oneDigitTag, zeroDigitTag, oneDigitTag, stopTag,
oneDigitTag, oneDigitTag, oneDigitTag, oneDigitTag, stopTag};
new (Stop) Use(tags[Done++]);
}
ptrdiff_t Count = Done;
while (Start != Stop) {
--Stop;
if (!Count) {
new (Stop) Use(stopTag);
++Done;
Count = Done;
} else {
new (Stop) Use(PrevPtrTag(Count & 1));
Count >>= 1;
++Done;
}
}
return Start;
}
void Use::zap(Use *Start, const Use *Stop, bool del) { void Use::zap(Use *Start, const Use *Stop, bool del) {
while (Start != Stop) while (Start != Stop)
(--Stop)->~Use(); (--Stop)->~Use();
@ -90,37 +48,4 @@ void Use::zap(Use *Start, const Use *Stop, bool del) {
::operator delete(Start); ::operator delete(Start);
} }
const Use *Use::getImpliedUser() const {
const Use *Current = this;
while (true) {
unsigned Tag = (Current++)->Prev.getInt();
switch (Tag) {
case zeroDigitTag:
case oneDigitTag:
continue;
case stopTag: {
++Current;
ptrdiff_t Offset = 1;
while (true) {
unsigned Tag = Current->Prev.getInt();
switch (Tag) {
case zeroDigitTag:
case oneDigitTag:
++Current;
Offset = (Offset << 1) + Tag;
continue;
default:
return Current + Offset;
}
}
}
case fullStopTag:
return Current;
}
}
}
} // End llvm namespace } // End llvm namespace

View File

@ -40,20 +40,18 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
void User::allocHungoffUses(unsigned N, bool IsPhi) { void User::allocHungoffUses(unsigned N, bool IsPhi) {
assert(HasHungOffUses && "alloc must have hung off uses"); assert(HasHungOffUses && "alloc must have hung off uses");
static_assert(alignof(Use) >= alignof(Use::UserRef), static_assert(alignof(Use) >= alignof(BasicBlock *),
"Alignment is insufficient for 'hung-off-uses' pieces");
static_assert(alignof(Use::UserRef) >= alignof(BasicBlock *),
"Alignment is insufficient for 'hung-off-uses' pieces"); "Alignment is insufficient for 'hung-off-uses' pieces");
// Allocate the array of Uses, followed by a pointer (with bottom bit set) to // Allocate the array of Uses
// the User. size_t size = N * sizeof(Use);
size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
if (IsPhi) if (IsPhi)
size += N * sizeof(BasicBlock *); size += N * sizeof(BasicBlock *);
Use *Begin = static_cast<Use*>(::operator new(size)); Use *Begin = static_cast<Use*>(::operator new(size));
Use *End = Begin + N; Use *End = Begin + N;
(void) new(End) Use::UserRef(const_cast<User*>(this), 1); setOperandList(Begin);
setOperandList(Use::initTags(Begin, End)); for (; Begin != End; Begin++)
new (Begin) Use(this);
} }
void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) { void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
@ -74,10 +72,8 @@ void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
// If this is a Phi, then we need to copy the BB pointers too. // If this is a Phi, then we need to copy the BB pointers too.
if (IsPhi) { if (IsPhi) {
auto *OldPtr = auto *OldPtr = reinterpret_cast<char *>(OldOps + OldNumUses);
reinterpret_cast<char *>(OldOps + OldNumUses) + sizeof(Use::UserRef); auto *NewPtr = reinterpret_cast<char *>(NewOps + NewNumUses);
auto *NewPtr =
reinterpret_cast<char *>(NewOps + NewNumUses) + sizeof(Use::UserRef);
std::copy(OldPtr, OldPtr + (OldNumUses * sizeof(BasicBlock *)), NewPtr); std::copy(OldPtr, OldPtr + (OldNumUses * sizeof(BasicBlock *)), NewPtr);
} }
Use::zap(OldOps, OldOps + OldNumUses, true); Use::zap(OldOps, OldOps + OldNumUses, true);
@ -135,7 +131,8 @@ void *User::allocateFixedOperandUser(size_t Size, unsigned Us,
Obj->NumUserOperands = Us; Obj->NumUserOperands = Us;
Obj->HasHungOffUses = false; Obj->HasHungOffUses = false;
Obj->HasDescriptor = DescBytes != 0; Obj->HasDescriptor = DescBytes != 0;
Use::initTags(Start, End); for (; Start != End; Start++)
new (Start) Use(Obj);
if (DescBytes != 0) { if (DescBytes != 0) {
auto *DescInfo = reinterpret_cast<DescriptorInfo *>(Storage + DescBytes); auto *DescInfo = reinterpret_cast<DescriptorInfo *>(Storage + DescBytes);

View File

@ -41,7 +41,6 @@ add_llvm_unittest(IRTests
VectorTypesTest.cpp VectorTypesTest.cpp
VerifierTest.cpp VerifierTest.cpp
VPIntrinsicTest.cpp VPIntrinsicTest.cpp
WaymarkTest.cpp
) )
target_link_libraries(IRTests PRIVATE LLVMTestingSupport) target_link_libraries(IRTests PRIVATE LLVMTestingSupport)

View File

@ -1,55 +0,0 @@
//===- llvm/unittest/IR/WaymarkTest.cpp - getUser() unit tests ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// we perform white-box tests
//
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "gtest/gtest.h"
#include <algorithm>
namespace llvm {
namespace {
TEST(WaymarkTest, NativeArray) {
LLVMContext Context;
static uint8_t tail[22] = "s02s33s30y2y0s1x0syxS";
Value * values[22];
std::transform(tail, tail + 22, values, [&](char c) {
return ConstantInt::get(Type::getInt8Ty(Context), c);
});
FunctionType *FT = FunctionType::get(Type::getVoidTy(Context), true);
std::unique_ptr<Function> F(
Function::Create(FT, GlobalValue::ExternalLinkage));
const CallInst *A = CallInst::Create(F.get(), makeArrayRef(values));
ASSERT_NE(A, (const CallInst*)nullptr);
ASSERT_EQ(1U + 22, A->getNumOperands());
const Use *U = &A->getOperandUse(0);
const Use *Ue = &A->getOperandUse(22);
for (; U != Ue; ++U)
{
EXPECT_EQ(A, U->getUser());
}
delete A;
}
TEST(WaymarkTest, TwoBit) {
Use* many = (Use*)calloc(sizeof(Use), 8212 + 1);
ASSERT_TRUE(many);
Use::initTags(many, many + 8212);
for (Use *U = many, *Ue = many + 8212 - 1; U != Ue; ++U)
{
EXPECT_EQ(reinterpret_cast<User *>(Ue + 1), U->getUser());
}
free(many);
}
} // end anonymous namespace
} // end namespace llvm