mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
a833e60074
Relative to the previous implementation, this always uses aliasesUnknownInst() instead of aliasesPointer() to correctly handle atomics. The added test case was previously miscompiled. ----- Even when MemorySSA-based LICM is used, an AST is still populated for scalar promotion. As the AST has quadratic complexity, a lot of time is spent in this step despite the existing access count limit. This patch optimizes the identification of promotable stores. The idea here is pretty simple: We're only interested in must-alias mod sets of loop invariant pointers. As such, only populate the AST with loop-invariant loads and stores (anything else is definitely not promotable) and then discard any sets which alias with any of the remaining, definitely non-promotable accesses. If we promoted something, check whether this has made some other accesses loop invariant and thus possible promotion candidates. This is much faster in practice, because we need to perform AA queries for O(NumPromotable^2 + NumPromotable*NumNonPromotable) instead of O(NumTotal^2), and NumPromotable tends to be small. Additionally, promotable accesses have loop invariant pointers, for which AA is cheaper. This has a signicant positive compile-time impact. We save ~1.8% geomean on CTMark at O3, with 6% on lencod in particular and 25% on individual files. Conceptually, this change is NFC, but may not be so in practice, because the AST is only an approximation, and can produce different results depending on the order in which accesses are added. However, there is at least no impact on the number of promotions (licm.NumPromoted) in test-suite O3 configuration with this change. Differential Revision: https://reviews.llvm.org/D89264
35 lines
1.4 KiB
LLVM
35 lines
1.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -licm < %s | FileCheck %s
|
|
|
|
%class.LiveThread = type { i64, %class.LiveThread* }
|
|
|
|
@globallive = external dso_local global i64, align 8
|
|
|
|
; The store should not be sunk (via scalar promotion) past the cmpxchg.
|
|
|
|
define void @test(%class.LiveThread* %live_thread) {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: [[NEXT_UNPROCESSED_:%.*]] = getelementptr inbounds [[CLASS_LIVETHREAD:%.*]], %class.LiveThread* [[LIVE_THREAD:%.*]], i64 0, i32 1
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: store %class.LiveThread* undef, %class.LiveThread** [[NEXT_UNPROCESSED_]], align 8
|
|
; CHECK-NEXT: [[XCHG:%.*]] = cmpxchg weak i64* @globallive, i64 undef, i64 undef release monotonic, align 8
|
|
; CHECK-NEXT: [[DONE:%.*]] = extractvalue { i64, i1 } [[XCHG]], 1
|
|
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%next_unprocessed_ = getelementptr inbounds %class.LiveThread, %class.LiveThread* %live_thread, i64 0, i32 1
|
|
br label %loop
|
|
|
|
loop:
|
|
store %class.LiveThread* undef, %class.LiveThread** %next_unprocessed_, align 8
|
|
%xchg = cmpxchg weak i64* @globallive, i64 undef, i64 undef release monotonic, align 8
|
|
%done = extractvalue { i64, i1 } %xchg, 1
|
|
br i1 %done, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|