mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
85bd37fc58
Global to local demotion can speed up programs that use globals a lot. It is particularly useful with LTO, when the entire call graph is known and most functions have been internalized. For a global to be demoted, it must only be accessed by one function and that function: 1. Must never recurse directly or indirectly, else the GV would be clobbered. 2. Must never rely on the value in GV at the start of the function (apart from the initializer). GlobalOpt can already do this, but it is hamstrung and only ever tries to demote globals inside "main", because C++ gives extra guarantees about how main is called - once and only once. In LTO mode, we can often prove the first property (if the function is internal by this point, we know enough about the callgraph to determine if it could possibly recurse). FunctionAttrs now infers the "norecurse" attribute for this reason. The second property can be proven for a subset of functions by proving that all loads from GV are dominated by a store to GV. This is conservative in the name of compile time - this only requires a DominatorTree which is fairly cheap in the grand scheme of things. We could do more fancy stuff with MemoryDependenceAnalysis too to catch more cases but this appears to catch most of the useful ones in my testing. llvm-svn: 253168
81 lines
1.8 KiB
LLVM
81 lines
1.8 KiB
LLVM
; RUN: opt -globalopt -S < %s | FileCheck %s
|
|
|
|
@G1 = internal global i32 5
|
|
@G2 = internal global i32 5
|
|
@G3 = internal global i32 5
|
|
@G4 = internal global i32 5
|
|
@G5 = internal global i32 5
|
|
|
|
; CHECK-LABEL: @test1
|
|
define internal i32 @test1() norecurse {
|
|
; CHECK-NOT: @G1
|
|
store i32 4, i32* @G1
|
|
%a = load i32, i32* @G1
|
|
; CHECK: ret
|
|
ret i32 %a
|
|
}
|
|
|
|
; The load comes before the store which makes @G2 live before the call.
|
|
; CHECK-LABEL: @test2
|
|
define internal i32 @test2() norecurse {
|
|
; CHECK-NOT: %G2
|
|
%a = load i32, i32* @G2
|
|
store i32 4, i32* @G2
|
|
; CHECK: ret
|
|
ret i32 %a
|
|
}
|
|
|
|
; This global is indexed by a GEP - this makes it partial alias and we bail out.
|
|
; FIXME: We don't actually have to bail out in this case.
|
|
|
|
; CHECK-LABEL: @test3
|
|
define internal i32 @test3() norecurse {
|
|
; CHECK-NOT: %G3
|
|
%x = getelementptr i32,i32* @G3, i32 0
|
|
%a = load i32, i32* %x
|
|
store i32 4, i32* @G3
|
|
; CHECK: ret
|
|
ret i32 %a
|
|
}
|
|
|
|
; The global is casted away to a larger type then loaded. The store only partially
|
|
; covers the load, so we must not demote.
|
|
|
|
; CHECK-LABEL: @test4
|
|
define internal i32 @test4() norecurse {
|
|
; CHECK-NOT: %G4
|
|
store i32 4, i32* @G4
|
|
%x = bitcast i32* @G4 to i64*
|
|
%a = load i64, i64* %x
|
|
%b = trunc i64 %a to i32
|
|
; CHECK: ret
|
|
ret i32 %b
|
|
}
|
|
|
|
; The global is casted away to a smaller type then loaded. This one is fine.
|
|
|
|
; CHECK-LABEL: @test5
|
|
define internal i32 @test5() norecurse {
|
|
; CHECK-NOT: @G5
|
|
store i32 4, i32* @G5
|
|
%x = bitcast i32* @G5 to i16*
|
|
%a = load i16, i16* %x
|
|
%b = zext i16 %a to i32
|
|
; CHECK: ret
|
|
ret i32 %b
|
|
}
|
|
|
|
define i32 @main() norecurse {
|
|
%a = call i32 @test1()
|
|
%b = call i32 @test2()
|
|
%c = call i32 @test3()
|
|
%d = call i32 @test4()
|
|
%e = call i32 @test5()
|
|
|
|
%x = or i32 %a, %b
|
|
%y = or i32 %x, %c
|
|
%z = or i32 %y, %d
|
|
%w = or i32 %z, %e
|
|
ret i32 %w
|
|
}
|