mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[regalloc] Ensure Query::collectInterferringVregs is called before interval iteration
The main part of the patch is the change in RegAllocGreedy.cpp: Q.collectInterferringVregs() needs to be called before iterating the interfering live ranges. The rest of the patch offers support that is the case: instead of clearing the query's InterferingVRegs field, we invalidate it. The clearing happens when the live reg matrix is invalidated (existing triggering mechanism). Without the change in RegAllocGreedy.cpp, the compiler ices. This patch should make it more easily discoverable by developers that collectInterferringVregs needs to be called before iterating. I will follow up with a subsequent patch to improve the usability and maintainability of Query. Differential Revision: https://reviews.llvm.org/D98232
This commit is contained in:
parent
388899404f
commit
9b55a6ca1b
@ -114,30 +114,30 @@ public:
|
|||||||
const LiveRange *LR = nullptr;
|
const LiveRange *LR = nullptr;
|
||||||
LiveRange::const_iterator LRI; ///< current position in LR
|
LiveRange::const_iterator LRI; ///< current position in LR
|
||||||
ConstSegmentIter LiveUnionI; ///< current position in LiveUnion
|
ConstSegmentIter LiveUnionI; ///< current position in LiveUnion
|
||||||
SmallVector<LiveInterval*,4> InterferingVRegs;
|
Optional<SmallVector<LiveInterval *, 4>> InterferingVRegs;
|
||||||
bool CheckedFirstInterference = false;
|
bool CheckedFirstInterference = false;
|
||||||
bool SeenAllInterferences = false;
|
bool SeenAllInterferences = false;
|
||||||
unsigned Tag = 0;
|
unsigned Tag = 0;
|
||||||
unsigned UserTag = 0;
|
unsigned UserTag = 0;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Query() = default;
|
||||||
|
Query(const LiveRange &LR, const LiveIntervalUnion &LIU)
|
||||||
|
: LiveUnion(&LIU), LR(&LR) {}
|
||||||
|
Query(const Query &) = delete;
|
||||||
|
Query &operator=(const Query &) = delete;
|
||||||
|
|
||||||
void reset(unsigned NewUserTag, const LiveRange &NewLR,
|
void reset(unsigned NewUserTag, const LiveRange &NewLR,
|
||||||
const LiveIntervalUnion &NewLiveUnion) {
|
const LiveIntervalUnion &NewLiveUnion) {
|
||||||
LiveUnion = &NewLiveUnion;
|
LiveUnion = &NewLiveUnion;
|
||||||
LR = &NewLR;
|
LR = &NewLR;
|
||||||
InterferingVRegs.clear();
|
InterferingVRegs = None;
|
||||||
CheckedFirstInterference = false;
|
CheckedFirstInterference = false;
|
||||||
SeenAllInterferences = false;
|
SeenAllInterferences = false;
|
||||||
Tag = NewLiveUnion.getTag();
|
Tag = NewLiveUnion.getTag();
|
||||||
UserTag = NewUserTag;
|
UserTag = NewUserTag;
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
|
||||||
Query() = default;
|
|
||||||
Query(const LiveRange &LR, const LiveIntervalUnion &LIU):
|
|
||||||
LiveUnion(&LIU), LR(&LR) {}
|
|
||||||
Query(const Query &) = delete;
|
|
||||||
Query &operator=(const Query &) = delete;
|
|
||||||
|
|
||||||
void init(unsigned NewUserTag, const LiveRange &NewLR,
|
void init(unsigned NewUserTag, const LiveRange &NewLR,
|
||||||
const LiveIntervalUnion &NewLiveUnion) {
|
const LiveIntervalUnion &NewLiveUnion) {
|
||||||
if (UserTag == NewUserTag && LR == &NewLR && LiveUnion == &NewLiveUnion &&
|
if (UserTag == NewUserTag && LR == &NewLR && LiveUnion == &NewLiveUnion &&
|
||||||
@ -164,7 +164,7 @@ public:
|
|||||||
|
|
||||||
// Vector generated by collectInterferingVRegs.
|
// Vector generated by collectInterferingVRegs.
|
||||||
const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
|
const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
|
||||||
return InterferingVRegs;
|
return *InterferingVRegs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
|
|||||||
// Scan the vector of interfering virtual registers in this union. Assume it's
|
// Scan the vector of interfering virtual registers in this union. Assume it's
|
||||||
// quite small.
|
// quite small.
|
||||||
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
|
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
|
||||||
return is_contained(InterferingVRegs, VirtReg);
|
return is_contained(*InterferingVRegs, VirtReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect virtual registers in this union that interfere with this
|
// Collect virtual registers in this union that interfere with this
|
||||||
@ -126,9 +126,12 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
|
|||||||
//
|
//
|
||||||
unsigned LiveIntervalUnion::Query::
|
unsigned LiveIntervalUnion::Query::
|
||||||
collectInterferingVRegs(unsigned MaxInterferingRegs) {
|
collectInterferingVRegs(unsigned MaxInterferingRegs) {
|
||||||
|
if (!InterferingVRegs)
|
||||||
|
InterferingVRegs.emplace();
|
||||||
|
|
||||||
// Fast path return if we already have the desired information.
|
// Fast path return if we already have the desired information.
|
||||||
if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
|
if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs)
|
||||||
return InterferingVRegs.size();
|
return InterferingVRegs->size();
|
||||||
|
|
||||||
// Set up iterators on the first call.
|
// Set up iterators on the first call.
|
||||||
if (!CheckedFirstInterference) {
|
if (!CheckedFirstInterference) {
|
||||||
@ -157,14 +160,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
|
|||||||
LiveInterval *VReg = LiveUnionI.value();
|
LiveInterval *VReg = LiveUnionI.value();
|
||||||
if (VReg != RecentReg && !isSeenInterference(VReg)) {
|
if (VReg != RecentReg && !isSeenInterference(VReg)) {
|
||||||
RecentReg = VReg;
|
RecentReg = VReg;
|
||||||
InterferingVRegs.push_back(VReg);
|
InterferingVRegs->push_back(VReg);
|
||||||
if (InterferingVRegs.size() >= MaxInterferingRegs)
|
if (InterferingVRegs->size() >= MaxInterferingRegs)
|
||||||
return InterferingVRegs.size();
|
return InterferingVRegs->size();
|
||||||
}
|
}
|
||||||
// This LiveUnion segment is no longer interesting.
|
// This LiveUnion segment is no longer interesting.
|
||||||
if (!(++LiveUnionI).valid()) {
|
if (!(++LiveUnionI).valid()) {
|
||||||
SeenAllInterferences = true;
|
SeenAllInterferences = true;
|
||||||
return InterferingVRegs.size();
|
return InterferingVRegs->size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,7 +188,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
|
|||||||
LiveUnionI.advanceTo(LRI->start);
|
LiveUnionI.advanceTo(LRI->start);
|
||||||
}
|
}
|
||||||
SeenAllInterferences = true;
|
SeenAllInterferences = true;
|
||||||
return InterferingVRegs.size();
|
return InterferingVRegs->size();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
|
void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
|
||||||
|
@ -216,7 +216,21 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
|
|||||||
|
|
||||||
// Check for interference with that segment
|
// Check for interference with that segment
|
||||||
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
||||||
if (query(LR, *Units).checkInterference())
|
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
|
||||||
|
// includes the address of the live range. If (for the same reg unit) this
|
||||||
|
// checkInterference overload is called twice, without any other query()
|
||||||
|
// calls in between (on heap-allocated LiveRanges) - which would invalidate
|
||||||
|
// the cached query - the LR address seen the second time may well be the
|
||||||
|
// same as that seen the first time, while the Start/End/valno may not - yet
|
||||||
|
// the same cached result would be fetched. To avoid that, we don't cache
|
||||||
|
// this query.
|
||||||
|
//
|
||||||
|
// FIXME: the usability of the Query API needs to be improved to avoid
|
||||||
|
// subtle bugs due to query identity. Avoiding caching, for example, would
|
||||||
|
// greatly simplify things.
|
||||||
|
LiveIntervalUnion::Query Q;
|
||||||
|
Q.reset(UserTag, LR, Matrix[*Units]);
|
||||||
|
if (Q.checkInterference())
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -471,12 +471,13 @@ private:
|
|||||||
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
|
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
|
||||||
bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
|
bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
|
||||||
const SmallVirtRegSet &) const;
|
const SmallVirtRegSet &) const;
|
||||||
bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg,
|
bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
|
||||||
SlotIndex Start, SlotIndex End,
|
MCRegister PhysReg, SlotIndex Start,
|
||||||
EvictionCost &MaxCost) const;
|
SlotIndex End, EvictionCost &MaxCost) const;
|
||||||
MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
|
MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
|
||||||
LiveInterval &VirtReg, SlotIndex Start,
|
const LiveInterval &VirtReg,
|
||||||
SlotIndex End, float *BestEvictWeight);
|
SlotIndex Start, SlotIndex End,
|
||||||
|
float *BestEvictWeight) const;
|
||||||
void evictInterference(LiveInterval &, MCRegister,
|
void evictInterference(LiveInterval &, MCRegister,
|
||||||
SmallVectorImpl<Register> &);
|
SmallVectorImpl<Register> &);
|
||||||
bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
|
bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
|
||||||
@ -979,7 +980,7 @@ bool RAGreedy::canEvictInterference(
|
|||||||
/// \param MaxCost Only look for cheaper candidates and update with new cost
|
/// \param MaxCost Only look for cheaper candidates and update with new cost
|
||||||
/// when returning true.
|
/// when returning true.
|
||||||
/// \return True when interference can be evicted cheaper than MaxCost.
|
/// \return True when interference can be evicted cheaper than MaxCost.
|
||||||
bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
|
bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
|
||||||
MCRegister PhysReg, SlotIndex Start,
|
MCRegister PhysReg, SlotIndex Start,
|
||||||
SlotIndex End,
|
SlotIndex End,
|
||||||
EvictionCost &MaxCost) const {
|
EvictionCost &MaxCost) const {
|
||||||
@ -987,6 +988,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
|
|||||||
|
|
||||||
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
||||||
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
|
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
|
||||||
|
Q.collectInterferingVRegs();
|
||||||
|
|
||||||
// Check if any interfering live range is heavier than MaxWeight.
|
// Check if any interfering live range is heavier than MaxWeight.
|
||||||
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
|
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
|
||||||
@ -1031,9 +1033,9 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
|
|||||||
/// \return The PhysReg which is the best candidate for eviction and the
|
/// \return The PhysReg which is the best candidate for eviction and the
|
||||||
/// eviction cost in BestEvictweight
|
/// eviction cost in BestEvictweight
|
||||||
MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
|
MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
|
||||||
LiveInterval &VirtReg,
|
const LiveInterval &VirtReg,
|
||||||
SlotIndex Start, SlotIndex End,
|
SlotIndex Start, SlotIndex End,
|
||||||
float *BestEvictweight) {
|
float *BestEvictweight) const {
|
||||||
EvictionCost BestEvictCost;
|
EvictionCost BestEvictCost;
|
||||||
BestEvictCost.setMax();
|
BestEvictCost.setMax();
|
||||||
BestEvictCost.MaxWeight = VirtReg.weight();
|
BestEvictCost.MaxWeight = VirtReg.weight();
|
||||||
@ -1556,25 +1558,9 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the local interval will evict a cheaper interval.
|
// The local interval is not able to find non interferencing assignment
|
||||||
float CheapestEvictWeight = 0;
|
// and not able to evict a less worthy interval, therfore, it can cause a
|
||||||
MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(
|
// spill.
|
||||||
Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(),
|
|
||||||
Cand.Intf.last(), &CheapestEvictWeight);
|
|
||||||
|
|
||||||
// Have we found an interval that can be evicted?
|
|
||||||
if (FutureEvictedPhysReg) {
|
|
||||||
float splitArtifactWeight =
|
|
||||||
VRAI->futureWeight(LIS->getInterval(VirtRegToSplit),
|
|
||||||
Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
|
|
||||||
// Will the weight of the local interval be higher than the cheapest evictee
|
|
||||||
// weight? If so it will evict it and will not cause a spill.
|
|
||||||
if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The local interval is not able to find non interferencing assignment and
|
|
||||||
// not able to evict a less worthy interval, therfore, it can cause a spill.
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -557,7 +557,7 @@ public:
|
|||||||
|
|
||||||
bool enableEarlyIfConversion() const override;
|
bool enableEarlyIfConversion() const override;
|
||||||
|
|
||||||
bool enableAdvancedRASplitCost() const override { return true; }
|
bool enableAdvancedRASplitCost() const override { return false; }
|
||||||
|
|
||||||
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
|
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
|
||||||
|
|
||||||
|
@ -941,7 +941,7 @@ public:
|
|||||||
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
|
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool enableAdvancedRASplitCost() const override { return true; }
|
bool enableAdvancedRASplitCost() const override { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s
|
; RUN: llc -consider-local-interval-cost -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s
|
||||||
|
|
||||||
@A = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
|
@A = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
|
||||||
@B = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
|
@B = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
|
; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
|
||||||
; Make sure bad eviction sequence doesnt occur
|
; Make sure bad eviction sequence doesnt occur
|
||||||
|
|
||||||
; Fix for bugzilla 26810.
|
; Fix for bugzilla 26810.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
|
; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
|
||||||
; Make sure bad eviction sequence doesnt occur
|
; Make sure bad eviction sequence doesnt occur
|
||||||
|
|
||||||
; Part of the fix for bugzilla 26810.
|
; Part of the fix for bugzilla 26810.
|
||||||
|
@ -162,9 +162,9 @@ define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind
|
|||||||
; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload
|
; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload
|
||||||
; X86-NOBMI-NEXT: adcl $0, %edi
|
; X86-NOBMI-NEXT: adcl $0, %edi
|
||||||
; X86-NOBMI-NEXT: movl %ebp, %esi
|
; X86-NOBMI-NEXT: movl %ebp, %esi
|
||||||
; X86-NOBMI-NEXT: xorl %ebx, %esi
|
; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %esi
|
||||||
; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill
|
; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %edi
|
; X86-NOBMI-NEXT: xorl %ebx, %edi
|
||||||
; X86-NOBMI-NEXT: orl %esi, %edi
|
; X86-NOBMI-NEXT: orl %esi, %edi
|
||||||
; X86-NOBMI-NEXT: jne .LBB1_2
|
; X86-NOBMI-NEXT: jne .LBB1_2
|
||||||
; X86-NOBMI-NEXT: .LBB1_3: # %for.end
|
; X86-NOBMI-NEXT: .LBB1_3: # %for.end
|
||||||
|
@ -390,25 +390,28 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
|
|||||||
; X32-NEXT: pushl %ebx
|
; X32-NEXT: pushl %ebx
|
||||||
; X32-NEXT: pushl %edi
|
; X32-NEXT: pushl %edi
|
||||||
; X32-NEXT: pushl %esi
|
; X32-NEXT: pushl %esi
|
||||||
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
|
; X32-NEXT: testl %ecx, %ecx
|
||||||
; X32-NEXT: je .LBB3_1
|
; X32-NEXT: je .LBB3_1
|
||||||
; X32-NEXT: # %bb.2: # %bb26.preheader
|
; X32-NEXT: # %bb.2: # %bb26.preheader
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; X32-NEXT: xorl %ebx, %ebx
|
; X32-NEXT: xorl %ebx, %ebx
|
||||||
; X32-NEXT: xorl %eax, %eax
|
; X32-NEXT: xorl %eax, %eax
|
||||||
; X32-NEXT: xorl %edx, %edx
|
; X32-NEXT: xorl %edx, %edx
|
||||||
; X32-NEXT: .p2align 4, 0x90
|
; X32-NEXT: .p2align 4, 0x90
|
||||||
; X32-NEXT: .LBB3_3: # %bb26
|
; X32-NEXT: .LBB3_3: # %bb26
|
||||||
; X32-NEXT: # =>This Inner Loop Header: Depth=1
|
; X32-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
; X32-NEXT: movl (%edi,%ebx,8), %ebp
|
; X32-NEXT: movl (%edi,%ebx,8), %ebp
|
||||||
|
; X32-NEXT: movl %ecx, %esi
|
||||||
; X32-NEXT: movl 4(%edi,%ebx,8), %ecx
|
; X32-NEXT: movl 4(%edi,%ebx,8), %ecx
|
||||||
; X32-NEXT: addl (%esi,%ebx,8), %ebp
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
; X32-NEXT: adcl 4(%esi,%ebx,8), %ecx
|
; X32-NEXT: addl (%edi,%ebx,8), %ebp
|
||||||
|
; X32-NEXT: adcl 4(%edi,%ebx,8), %ecx
|
||||||
; X32-NEXT: addl %ebp, %eax
|
; X32-NEXT: addl %ebp, %eax
|
||||||
; X32-NEXT: adcl %ecx, %edx
|
; X32-NEXT: adcl %ecx, %edx
|
||||||
|
; X32-NEXT: movl %esi, %ecx
|
||||||
; X32-NEXT: incl %ebx
|
; X32-NEXT: incl %ebx
|
||||||
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
|
; X32-NEXT: cmpl %esi, %ebx
|
||||||
; X32-NEXT: jb .LBB3_3
|
; X32-NEXT: jb .LBB3_3
|
||||||
; X32-NEXT: jmp .LBB3_4
|
; X32-NEXT: jmp .LBB3_4
|
||||||
; X32-NEXT: .LBB3_1:
|
; X32-NEXT: .LBB3_1:
|
||||||
|
@ -450,49 +450,51 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
|
|||||||
; CHECK-NEXT: pushl %edi
|
; CHECK-NEXT: pushl %edi
|
||||||
; CHECK-NEXT: pushl %esi
|
; CHECK-NEXT: pushl %esi
|
||||||
; CHECK-NEXT: subl $28, %esp
|
; CHECK-NEXT: subl $28, %esp
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
; CHECK-NEXT: movl %ebp, %edx
|
||||||
; CHECK-NEXT: movl %ebp, %eax
|
; CHECK-NEXT: imull %eax, %edx
|
||||||
; CHECK-NEXT: imull %ecx, %eax
|
|
||||||
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
|
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
|
||||||
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
|
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
|
||||||
; CHECK-NEXT: je LBB1_19
|
; CHECK-NEXT: je LBB1_19
|
||||||
; CHECK-NEXT: ## %bb.1: ## %bb10.preheader
|
; CHECK-NEXT: ## %bb.1: ## %bb10.preheader
|
||||||
; CHECK-NEXT: shrl $2, %eax
|
; CHECK-NEXT: movl %edx, %ecx
|
||||||
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
|
; CHECK-NEXT: shrl $2, %ecx
|
||||||
|
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
|
||||||
; CHECK-NEXT: testl %ebp, %ebp
|
; CHECK-NEXT: testl %ebp, %ebp
|
||||||
|
; CHECK-NEXT: movl %eax, %edi
|
||||||
; CHECK-NEXT: je LBB1_12
|
; CHECK-NEXT: je LBB1_12
|
||||||
; CHECK-NEXT: ## %bb.2: ## %bb.nph9
|
; CHECK-NEXT: ## %bb.2: ## %bb.nph9
|
||||||
; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
; CHECK-NEXT: testl %eax, %eax
|
||||||
; CHECK-NEXT: je LBB1_12
|
; CHECK-NEXT: je LBB1_12
|
||||||
; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split
|
; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: incl %eax
|
; CHECK-NEXT: incl %eax
|
||||||
; CHECK-NEXT: xorl %ecx, %ecx
|
; CHECK-NEXT: xorl %ecx, %ecx
|
||||||
; CHECK-NEXT: .p2align 4, 0x90
|
; CHECK-NEXT: movl %esi, %edx
|
||||||
; CHECK-NEXT: LBB1_6: ## %bb7.preheader
|
|
||||||
; CHECK-NEXT: ## =>This Loop Header: Depth=1
|
|
||||||
; CHECK-NEXT: ## Child Loop BB1_4 Depth 2
|
|
||||||
; CHECK-NEXT: xorl %esi, %esi
|
; CHECK-NEXT: xorl %esi, %esi
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; CHECK-NEXT: .p2align 4, 0x90
|
; CHECK-NEXT: .p2align 4, 0x90
|
||||||
; CHECK-NEXT: LBB1_4: ## %bb6
|
; CHECK-NEXT: LBB1_4: ## %bb6
|
||||||
; CHECK-NEXT: ## Parent Loop BB1_6 Depth=1
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||||
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
|
|
||||||
; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx
|
; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx
|
||||||
; CHECK-NEXT: movb %bl, (%edx,%esi)
|
; CHECK-NEXT: movb %bl, (%edx,%esi)
|
||||||
; CHECK-NEXT: incl %esi
|
; CHECK-NEXT: incl %esi
|
||||||
; CHECK-NEXT: cmpl %edi, %esi
|
; CHECK-NEXT: cmpl %edi, %esi
|
||||||
; CHECK-NEXT: jb LBB1_4
|
; CHECK-NEXT: jb LBB1_4
|
||||||
; CHECK-NEXT: ## %bb.5: ## %bb9
|
; CHECK-NEXT: ## %bb.5: ## %bb9
|
||||||
; CHECK-NEXT: ## in Loop: Header=BB1_6 Depth=1
|
; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1
|
||||||
|
; CHECK-NEXT: movl %edi, %ebx
|
||||||
; CHECK-NEXT: incl %ecx
|
; CHECK-NEXT: incl %ecx
|
||||||
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: addl %edi, %edx
|
; CHECK-NEXT: addl %edi, %edx
|
||||||
; CHECK-NEXT: cmpl %ebp, %ecx
|
; CHECK-NEXT: cmpl %ebp, %ecx
|
||||||
; CHECK-NEXT: jne LBB1_6
|
; CHECK-NEXT: je LBB1_12
|
||||||
|
; CHECK-NEXT: ## %bb.6: ## %bb7.preheader
|
||||||
|
; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1
|
||||||
|
; CHECK-NEXT: xorl %esi, %esi
|
||||||
|
; CHECK-NEXT: jmp LBB1_4
|
||||||
; CHECK-NEXT: LBB1_12: ## %bb18.loopexit
|
; CHECK-NEXT: LBB1_12: ## %bb18.loopexit
|
||||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
|
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
|
||||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
|
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
|
||||||
@ -501,10 +503,10 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
|
|||||||
; CHECK-NEXT: cmpl $1, %ebp
|
; CHECK-NEXT: cmpl $1, %ebp
|
||||||
; CHECK-NEXT: jbe LBB1_13
|
; CHECK-NEXT: jbe LBB1_13
|
||||||
; CHECK-NEXT: ## %bb.7: ## %bb.nph5
|
; CHECK-NEXT: ## %bb.7: ## %bb.nph5
|
||||||
; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp)
|
; CHECK-NEXT: cmpl $2, %edi
|
||||||
; CHECK-NEXT: jb LBB1_13
|
; CHECK-NEXT: jb LBB1_13
|
||||||
; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split
|
; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; CHECK-NEXT: movl %edi, %ebp
|
||||||
; CHECK-NEXT: shrl %ebp
|
; CHECK-NEXT: shrl %ebp
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: shrl %eax
|
; CHECK-NEXT: shrl %eax
|
||||||
@ -518,14 +520,14 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
|
|||||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
|
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
|
||||||
; CHECK-NEXT: addl %edx, %eax
|
; CHECK-NEXT: addl %edx, %eax
|
||||||
; CHECK-NEXT: xorl %edx, %edx
|
; CHECK-NEXT: xorl %edx, %edx
|
||||||
; CHECK-NEXT: xorl %edi, %edi
|
; CHECK-NEXT: xorl %ebx, %ebx
|
||||||
; CHECK-NEXT: .p2align 4, 0x90
|
; CHECK-NEXT: .p2align 4, 0x90
|
||||||
; CHECK-NEXT: LBB1_9: ## %bb13
|
; CHECK-NEXT: LBB1_9: ## %bb13
|
||||||
; CHECK-NEXT: ## =>This Loop Header: Depth=1
|
; CHECK-NEXT: ## =>This Loop Header: Depth=1
|
||||||
; CHECK-NEXT: ## Child Loop BB1_10 Depth 2
|
; CHECK-NEXT: ## Child Loop BB1_10 Depth 2
|
||||||
; CHECK-NEXT: movl %edi, %ebx
|
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
|
||||||
; CHECK-NEXT: andl $1, %ebx
|
; CHECK-NEXT: andl $1, %ebx
|
||||||
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
|
; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill
|
||||||
; CHECK-NEXT: addl %edx, %ebx
|
; CHECK-NEXT: addl %edx, %ebx
|
||||||
; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx
|
; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx
|
||||||
; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
|
; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
|
||||||
@ -543,26 +545,27 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
|
|||||||
; CHECK-NEXT: jb LBB1_10
|
; CHECK-NEXT: jb LBB1_10
|
||||||
; CHECK-NEXT: ## %bb.11: ## %bb17
|
; CHECK-NEXT: ## %bb.11: ## %bb17
|
||||||
; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1
|
; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1
|
||||||
; CHECK-NEXT: incl %edi
|
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
|
||||||
|
; CHECK-NEXT: incl %ebx
|
||||||
; CHECK-NEXT: addl %ebp, %ecx
|
; CHECK-NEXT: addl %ebp, %ecx
|
||||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
|
; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload
|
||||||
; CHECK-NEXT: addl $2, %edx
|
; CHECK-NEXT: addl $2, %edx
|
||||||
; CHECK-NEXT: addl %ebp, %eax
|
; CHECK-NEXT: addl %ebp, %eax
|
||||||
; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
|
; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
|
||||||
; CHECK-NEXT: jb LBB1_9
|
; CHECK-NEXT: jb LBB1_9
|
||||||
; CHECK-NEXT: LBB1_13: ## %bb20
|
; CHECK-NEXT: LBB1_13: ## %bb20
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; CHECK-NEXT: cmpl $1, %edx
|
; CHECK-NEXT: cmpl $1, %esi
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
|
; CHECK-NEXT: movl %edi, %eax
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; CHECK-NEXT: je LBB1_19
|
; CHECK-NEXT: je LBB1_19
|
||||||
; CHECK-NEXT: ## %bb.14: ## %bb20
|
; CHECK-NEXT: ## %bb.14: ## %bb20
|
||||||
; CHECK-NEXT: cmpl $3, %edx
|
; CHECK-NEXT: cmpl $3, %esi
|
||||||
; CHECK-NEXT: jne LBB1_24
|
; CHECK-NEXT: jne LBB1_24
|
||||||
; CHECK-NEXT: ## %bb.15: ## %bb22
|
; CHECK-NEXT: ## %bb.15: ## %bb22
|
||||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
|
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
|
||||||
; CHECK-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
|
; CHECK-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
|
||||||
; CHECK-NEXT: testl %ebp, %ebp
|
; CHECK-NEXT: testl %ebp, %ebp
|
||||||
; CHECK-NEXT: je LBB1_18
|
; CHECK-NEXT: je LBB1_18
|
||||||
; CHECK-NEXT: ## %bb.16: ## %bb.nph
|
; CHECK-NEXT: ## %bb.16: ## %bb.nph
|
||||||
@ -570,9 +573,11 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
|
|||||||
; CHECK-NEXT: leal 15(%ebp), %eax
|
; CHECK-NEXT: leal 15(%ebp), %eax
|
||||||
; CHECK-NEXT: andl $-16, %eax
|
; CHECK-NEXT: andl $-16, %eax
|
||||||
; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: leal 15(%ecx), %ebx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; CHECK-NEXT: andl $-16, %ebx
|
; CHECK-NEXT: addl $15, %edx
|
||||||
; CHECK-NEXT: addl %eax, %edi
|
; CHECK-NEXT: andl $-16, %edx
|
||||||
|
; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill
|
||||||
|
; CHECK-NEXT: addl %eax, %ecx
|
||||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
|
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; CHECK-NEXT: leal (%edx,%eax), %ebp
|
; CHECK-NEXT: leal (%edx,%eax), %ebp
|
||||||
@ -580,14 +585,16 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
|
|||||||
; CHECK-NEXT: LBB1_17: ## %bb23
|
; CHECK-NEXT: LBB1_17: ## %bb23
|
||||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||||
; CHECK-NEXT: subl $4, %esp
|
; CHECK-NEXT: subl $4, %esp
|
||||||
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||||
|
; CHECK-NEXT: pushl %ebx
|
||||||
; CHECK-NEXT: pushl %ecx
|
; CHECK-NEXT: pushl %ecx
|
||||||
; CHECK-NEXT: pushl %edi
|
|
||||||
; CHECK-NEXT: pushl %ebp
|
; CHECK-NEXT: pushl %ebp
|
||||||
|
; CHECK-NEXT: movl %ecx, %edi
|
||||||
; CHECK-NEXT: calll _memcpy
|
; CHECK-NEXT: calll _memcpy
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; CHECK-NEXT: movl %edi, %ecx
|
||||||
; CHECK-NEXT: addl $16, %esp
|
; CHECK-NEXT: addl $16, %esp
|
||||||
; CHECK-NEXT: addl %ecx, %ebp
|
; CHECK-NEXT: addl %ebx, %ebp
|
||||||
; CHECK-NEXT: addl %ebx, %edi
|
; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
|
||||||
; CHECK-NEXT: decl %esi
|
; CHECK-NEXT: decl %esi
|
||||||
; CHECK-NEXT: jne LBB1_17
|
; CHECK-NEXT: jne LBB1_17
|
||||||
; CHECK-NEXT: LBB1_18: ## %bb26
|
; CHECK-NEXT: LBB1_18: ## %bb26
|
||||||
@ -607,21 +614,24 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
|
|||||||
; CHECK-NEXT: je LBB1_22
|
; CHECK-NEXT: je LBB1_22
|
||||||
; CHECK-NEXT: ## %bb.20: ## %bb.nph11
|
; CHECK-NEXT: ## %bb.20: ## %bb.nph11
|
||||||
; CHECK-NEXT: movl %ebp, %esi
|
; CHECK-NEXT: movl %ebp, %esi
|
||||||
; CHECK-NEXT: leal 15(%ecx), %ebx
|
; CHECK-NEXT: movl %eax, %edi
|
||||||
; CHECK-NEXT: andl $-16, %ebx
|
; CHECK-NEXT: addl $15, %eax
|
||||||
|
; CHECK-NEXT: andl $-16, %eax
|
||||||
|
; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
; CHECK-NEXT: .p2align 4, 0x90
|
; CHECK-NEXT: .p2align 4, 0x90
|
||||||
; CHECK-NEXT: LBB1_21: ## %bb30
|
; CHECK-NEXT: LBB1_21: ## %bb30
|
||||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||||
; CHECK-NEXT: subl $4, %esp
|
; CHECK-NEXT: subl $4, %esp
|
||||||
; CHECK-NEXT: pushl %ecx
|
|
||||||
; CHECK-NEXT: pushl %edi
|
; CHECK-NEXT: pushl %edi
|
||||||
|
; CHECK-NEXT: pushl %ecx
|
||||||
; CHECK-NEXT: pushl %ebp
|
; CHECK-NEXT: pushl %ebp
|
||||||
|
; CHECK-NEXT: movl %ecx, %ebx
|
||||||
; CHECK-NEXT: calll _memcpy
|
; CHECK-NEXT: calll _memcpy
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; CHECK-NEXT: movl %ebx, %ecx
|
||||||
; CHECK-NEXT: addl $16, %esp
|
; CHECK-NEXT: addl $16, %esp
|
||||||
; CHECK-NEXT: addl %ecx, %ebp
|
; CHECK-NEXT: addl %edi, %ebp
|
||||||
; CHECK-NEXT: addl %ebx, %edi
|
; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
|
||||||
; CHECK-NEXT: decl %esi
|
; CHECK-NEXT: decl %esi
|
||||||
; CHECK-NEXT: jne LBB1_21
|
; CHECK-NEXT: jne LBB1_21
|
||||||
; CHECK-NEXT: LBB1_22: ## %bb33
|
; CHECK-NEXT: LBB1_22: ## %bb33
|
||||||
|
Loading…
Reference in New Issue
Block a user