mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
c7934c2798
D65060 was reverted because it introduced non-determinism by using BFI counts from already freed blocks. The parent of this revision fixes that by using a VH callback on blocks to prevent this from happening and makes sure BFI data is passed correctly in LoopStandardAnalysisResults. This re-introduces the previous optimization of using BFI data to prevent LICM from hoisting/sinking if the instruction will end up moving to a colder block. Internally at Facebook this change results in a ~7% win in a CPU related metric in one of our big services by preventing hoisting cold code into a hot pre-header like the added test case demonstrates. Testing: ninja check Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87551
89 lines
3.4 KiB
LLVM
89 lines
3.4 KiB
LLVM
; RUN: opt -enable-new-pm=1 -sample-profile -licm -S -sample-profile-file='%S/Inputs/no-hoist-prof.prof' < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM
|
|
; RUN: opt -passes=licm -S < %s | FileCheck %s --check-prefix=CHECK-LICM
|
|
|
|
; Original source code:
|
|
;
|
|
; int bar(int);
|
|
; int foo(int iter, int explode) {
|
|
; int base = bar(explode);
|
|
; for (int i = 0; i != iter; ++i)
|
|
; if (i == explode)
|
|
; iter = (base * base) + bar(iter);
|
|
; return iter;
|
|
; }
|
|
|
|
; We need debug information in this .ll in order to leverage the pgo file, so:
|
|
; .ll generated by running `clang++ -O3 -g -S -emit-llvm`, then:
|
|
; - move hoisted mul back into cold section
|
|
; - give labels names
|
|
; - reindex variables
|
|
; - remove metadata calls, attributes, module header
|
|
; - remove unnecessary metadata
|
|
|
|
; CHECK-LICM: .l.check.preheader:{{.*}}
|
|
; CHECK-LICM-NEXT: {{.*}} = mul {{.*}}
|
|
; CHECK-LICM-NEXT: br{{.*}}
|
|
|
|
; CHECK-BFI-LICM: .l.cold:{{.*}}
|
|
; CHECK-BFI-LICM-NEXT: {{.*}} = mul {{.*}}
|
|
|
|
define dso_local i32 @_Z3fooii(i32, i32) local_unnamed_addr #0 !dbg !7 {
|
|
%3 = tail call i32 @_Z3bari(i32 %1), !dbg !19
|
|
%4 = icmp eq i32 %0, 0, !dbg !22
|
|
br i1 %4, label %.l.ret, label %.l.check.preheader, !dbg !24
|
|
|
|
.l.check.preheader:
|
|
br label %.l.check, !dbg !24
|
|
|
|
.l.ret:
|
|
%5 = phi i32 [ 0, %2 ], [ %12, %.l.iterate ]
|
|
ret i32 %5, !dbg !25
|
|
|
|
.l.check:
|
|
%6 = phi i32 [ 0, %.l.check.preheader ], [ %13, %.l.iterate ]
|
|
%7 = phi i32 [ %0, %.l.check.preheader ], [ %12, %.l.iterate ]
|
|
%8 = icmp eq i32 %6, %1, !dbg !26
|
|
br i1 %8, label %.l.cold, label %.l.iterate, !dbg !28
|
|
|
|
.l.cold:
|
|
%9 = mul nsw i32 %3, %3
|
|
%10 = tail call i32 @_Z3bari(i32 %7), !dbg !29
|
|
%11 = add nsw i32 %10, %9, !dbg !30
|
|
br label %.l.iterate, !dbg !31
|
|
|
|
.l.iterate:
|
|
%12 = phi i32 [ %11, %.l.cold ], [ %7, %.l.check ]
|
|
%13 = add nuw nsw i32 %6, 1, !dbg !32
|
|
%14 = icmp eq i32 %13, %12, !dbg !22
|
|
br i1 %14, label %.l.ret, label %.l.check, !dbg !24, !llvm.loop !33
|
|
}
|
|
|
|
attributes #0 = { "use-sample-profile" }
|
|
|
|
declare dso_local i32 @_Z3bari(i32) local_unnamed_addr #1
|
|
|
|
!llvm.module.flags = !{!4}
|
|
|
|
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, nameTableKind: None)
|
|
!1 = !DIFile(filename: "foo.cpp", directory: "/tmp/gather_pgo")
|
|
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
|
!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooii", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
|
|
!8 = !DISubroutineType(types: !9)
|
|
!9 = !{!10, !10, !10}
|
|
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
|
!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3)
|
|
!19 = !DILocation(line: 3, column: 14, scope: !7)
|
|
!22 = !DILocation(line: 4, column: 21, scope: !23)
|
|
!23 = distinct !DILexicalBlock(scope: !16, file: !1, line: 4, column: 3)
|
|
!24 = !DILocation(line: 4, column: 3, scope: !16)
|
|
!25 = !DILocation(line: 7, column: 3, scope: !7)
|
|
!26 = !DILocation(line: 5, column: 11, scope: !27)
|
|
!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 5, column: 9)
|
|
!28 = !DILocation(line: 5, column: 9, scope: !23)
|
|
!29 = !DILocation(line: 6, column: 30, scope: !27)
|
|
!30 = !DILocation(line: 6, column: 28, scope: !27)
|
|
!31 = !DILocation(line: 6, column: 7, scope: !27)
|
|
!32 = !DILocation(line: 4, column: 30, scope: !23)
|
|
!33 = distinct !{!33, !24, !34}
|
|
!34 = !DILocation(line: 6, column: 38, scope: !16)
|