1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/Transforms/PGOProfile/comdat_internal.ll
Fangrui Song 715c06d0a3 [PGO] Move __profc_ and __profvp_ from their own comdat groups to __profd_'s comdat group
D68041 placed `__profc_`,  `__profd_` and (if exists) `__profvp_` in different comdat groups.
There are some issues:

* Cost: one or two additional section headers (`.group` section(s)): 64 or 128 bytes on ELF64.
* `__profc_`,  `__profd_` and (if exists) `__profvp_` should be retained or
  discarded. Placing them into separate comdat groups is conceptually inferior.
* If the prevailing group does not include `__profvp_` (value profiling not
  used) but a non-prevailing group from another translation unit has `__profvp_`
  (the function is inlined into another and triggers value profiling), there
  will be a stray `__profvp_` if --gc-sections is not enabled.
  This has been fixed by 3d6f53018f845e893ad34f64ff2851a2e5c3ba1d.

Actually, we can reuse an existing symbol (we choose `__profd_`) as the group
signature to avoid a string in the string table (the sole reason that D68041
could improve code size is that `__profv_` was an otherwise unused symbol which
wasted string table space). This saves one or two section headers.

For a -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_INSTRUMENTED=IR build, `ninja
clang lld`, the patch has saved 10.5MiB (2.2%) for the total .o size.

Reviewed By: davidxl

Differential Revision: https://reviews.llvm.org/D84723
2020-08-03 20:35:50 -07:00

27 lines
1.2 KiB
LLVM

; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s
; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
$foo = comdat any
; CHECK: $foo = comdat any
; CHECK: $__llvm_profile_raw_version = comdat any
; CHECK: $__profd__stdin__foo.[[FOO_HASH:[0-9]+]] = comdat any
@bar = global i32 ()* @foo, align 8
; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; CHECK-NOT: __profn__stdin__foo
; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profd__stdin__foo.[[FOO_HASH]]), align 8
; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null
; CHECK-NOT: bitcast (i32 ()* @foo to i8*)
; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat, align 8
; CHECK: @__llvm_prf_nm
; CHECK: @llvm.used
define internal i32 @foo() comdat {
entry:
ret i32 1
}