mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[InstrProfiling][ELF] Make __profd_ private if the function does not use value profiling
On ELF, the D1003372 optimization can apply to more cases. There are two prerequisites for making `__profd_` private: * `__profc_` keeps `__profd_` live under compiler/linker GC * `__profd_` is not referenced by code The first is satisfied because all counters/data are in a section group (either `comdat any` or `comdat noduplicates`). The second requires that the function does not use value profiling. Regarding the second point: `__profd_` may be referenced by other text sections due to inlining. There will be a linker error if a prevailing text section references the non-prevailing local symbol. With this change, a stage 2 (`-DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_INSTRUMENTED=IR`) clang is 4.2% smaller (1-169620032/177066968). `stat -c %s **/*.o | awk '{s+=$1}END{print s}' is 2.5% smaller. Reviewed By: davidxl, rnk Differential Revision: https://reviews.llvm.org/D103717
This commit is contained in:
parent
6c04830c65
commit
e71d1a723a
@ -887,25 +887,22 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
|
||||
// Allocate statically the array of pointers to value profile nodes for
|
||||
// the current function.
|
||||
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
|
||||
if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
|
||||
uint64_t NS = 0;
|
||||
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
||||
NS += PD.NumValueSites[Kind];
|
||||
if (NS) {
|
||||
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
|
||||
|
||||
auto *ValuesVar =
|
||||
new GlobalVariable(*M, ValuesTy, false, Linkage,
|
||||
Constant::getNullValue(ValuesTy),
|
||||
getVarName(Inc, getInstrProfValuesVarPrefix()));
|
||||
ValuesVar->setVisibility(Visibility);
|
||||
ValuesVar->setSection(
|
||||
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
|
||||
ValuesVar->setAlignment(Align(8));
|
||||
MaybeSetComdat(ValuesVar);
|
||||
ValuesPtrExpr =
|
||||
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
|
||||
}
|
||||
uint64_t NS = 0;
|
||||
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
||||
NS += PD.NumValueSites[Kind];
|
||||
if (NS > 0 && ValueProfileStaticAlloc &&
|
||||
!needsRuntimeRegistrationOfSectionRange(TT)) {
|
||||
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
|
||||
auto *ValuesVar = new GlobalVariable(
|
||||
*M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
|
||||
getVarName(Inc, getInstrProfValuesVarPrefix()));
|
||||
ValuesVar->setVisibility(Visibility);
|
||||
ValuesVar->setSection(
|
||||
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
|
||||
ValuesVar->setAlignment(Align(8));
|
||||
MaybeSetComdat(ValuesVar);
|
||||
ValuesPtrExpr =
|
||||
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
|
||||
}
|
||||
|
||||
// Create data variable.
|
||||
@ -929,10 +926,15 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
|
||||
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
|
||||
#include "llvm/ProfileData/InstrProfData.inc"
|
||||
};
|
||||
// If code never references data variables (the symbol is unneeded), and
|
||||
// linker GC cannot discard data variables while the text section is retained,
|
||||
// data variables can be private. This optimization applies on COFF and ELF.
|
||||
if (!DataReferencedByCode && !TT.isOSBinFormatMachO()) {
|
||||
// If the data variable is not referenced by code (if we don't emit
|
||||
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
|
||||
// data variable live under linker GC, the data variable can be private. This
|
||||
// optimization applies to ELF.
|
||||
//
|
||||
// On COFF, a comdat leader cannot be local so we require DataReferencedByCode
|
||||
// to be false.
|
||||
if (NS == 0 && (TT.isOSBinFormatELF() ||
|
||||
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
|
||||
Linkage = GlobalValue::PrivateLinkage;
|
||||
Visibility = GlobalValue::DefaultVisibility;
|
||||
}
|
||||
|
@ -1,22 +1,58 @@
|
||||
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
|
||||
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
|
||||
; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s --check-prefix=LOWER
|
||||
; RUN: opt < %s -passes=pgo-instr-gen,instrprof -vp-static-alloc=true -S | FileCheck %s --check-prefix=LOWER
|
||||
; RUN: opt < %s -passes=pgo-instr-gen,instrprof -vp-static-alloc=false -S | FileCheck %s --check-prefix=LOWER
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
$foo3 = comdat any
|
||||
$novp_inline = comdat any
|
||||
$vp_inline = comdat any
|
||||
|
||||
@bar = external global void ()*, align 8
|
||||
|
||||
; GEN: @__profn_novp_inline = linkonce_odr hidden constant [11 x i8] c"novp_inline"
|
||||
; GEN: @__profn_foo = private constant [3 x i8] c"foo"
|
||||
; GEN: @__profn_vp_inline = linkonce_odr hidden constant [9 x i8] c"vp_inline"
|
||||
|
||||
;; Test that a linkonce function's address is recorded.
|
||||
;; We allow a linkonce profd to be private if the function does not use value profiling.
|
||||
; LOWER: @__profd_novp_inline.[[HASH:[0-9]+]] = private global {{.*}} @__profc_novp_inline.[[HASH]]
|
||||
; LOWER-SAME: i8* bitcast (void ()* @novp_inline to i8*)
|
||||
; LOWER: @__profd_foo = private {{.*}} @__profc_foo
|
||||
|
||||
;; __profd_vp_inline.[[#]] is referenced by code and may be referenced by other
|
||||
;; text sections due to inlining. It can't be local because a linker error would
|
||||
;; occur if a prevailing text section references the non-prevailing local symbol.
|
||||
; LOWER: @__profd_vp_inline.[[FOO_HASH:[0-9]+]] = linkonce_odr hidden {{.*}} @__profc_vp_inline.[[FOO_HASH]]
|
||||
; LOWER-SAME: i8* bitcast (void ()* @vp_inline to i8*)
|
||||
|
||||
define linkonce_odr void @novp_inline() comdat {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
; GEN: @foo()
|
||||
; GEN: entry:
|
||||
; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH:[0-9]+]], i32 1, i32 0)
|
||||
; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[#FOO_HASH:]], i32 1, i32 0)
|
||||
%tmp = load void ()*, void ()** @bar, align 8
|
||||
; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64
|
||||
; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0)
|
||||
; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[#FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0)
|
||||
; LOWER: call void @__llvm_profile_instrument_target(i64 %1, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
|
||||
call void %tmp()
|
||||
ret void
|
||||
}
|
||||
|
||||
define linkonce_odr void @vp_inline() comdat {
|
||||
entry:
|
||||
; GEN: @vp_inline()
|
||||
; GEN: entry:
|
||||
; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_vp_inline, i32 0, i32 0), i64 [[#FOO_HASH:]], i32 1, i32 0)
|
||||
%tmp = load void ()*, void ()** @bar, align 8
|
||||
; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64
|
||||
; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_vp_inline, i32 0, i32 0), i64 [[#FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0)
|
||||
; LOWER: call void @__llvm_profile_instrument_target(i64 %1, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_vp_inline.[[#]] to i8*), i32 0)
|
||||
call void %tmp()
|
||||
ret void
|
||||
}
|
||||
@ -53,13 +89,6 @@ bb11: ; preds = %bb2
|
||||
resume { i8*, i32 } %tmp3
|
||||
}
|
||||
|
||||
; Test that comdat function's address is recorded.
|
||||
; LOWER: @__profd_foo3.[[FOO3_HASH:[0-9]+]] = linkonce_odr{{.*}}@__profc_foo3.[[FOO3_HASH]]
|
||||
; Function Attrs: nounwind uwtable
|
||||
define linkonce_odr i32 @foo3() comdat {
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
|
Loading…
Reference in New Issue
Block a user