1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00
llvm-mirror/test/Transforms/FunctionSpecialization/function-specialization2.ll
Sjoerd Meijer 6a49dbd1a3 Function Specialization Pass
This adds a function specialization pass to LLVM. Constant parameters
like function pointers and constant globals are propagated to the callee by
specializing the function.

This is a first version with a number of limitations:
- The pass is off by default, so needs to be enabled on the command line,
- It does not handle specialization of recursive functions,
- It does not yet handle constants and constant ranges,
- Only 1 argument per function is specialised,
- The cost-model could be further looked into, and perhaps related,
- We are not yet caching analysis results.

This is based on earlier work by Matthew Simpson (D36432) and Vinay Madhusudan.
More recently this was also discussed on the list, see:

https://lists.llvm.org/pipermail/llvm-dev/2021-March/149380.html.

The motivation for this work is that function specialisation often comes up as
a reason for performance differences of generated code between LLVM and GCC,
which has this enabled by default from optimisation level -O3 and up. And while
this certainly helps a few cpu benchmark cases, this also triggers in real
world codes and is thus a generally useful transformation to have in LLVM.

Function specialisation has great potential to increase compile-times and
code-size.  The summary from some investigations with this patch is:
- Compile-time increases for short compile jobs is high relatively, but the
  increase in absolute numbers still low.
- For longer compile-jobs, the extra compile time is around 1%, and very much
  in line with GCC.
- It is difficult to blame one thing for compile-time increases: it looks like
  everywhere a little bit more time is spent processing more functions and
  instructions.
- But the function specialisation pass itself is not very expensive; it doesn't
  show up very high in the profile of the optimisation passes.

The goal of this work is to reach parity with GCC which means that eventually
we would like to get this enabled by default. But first we would like to address
some of the limitations before that.

Differential Revision: https://reviews.llvm.org/D93838
2021-06-11 09:11:29 +01:00

88 lines
3.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -function-specialization -deadargelim -S < %s | FileCheck %s
; RUN: opt -function-specialization -func-specialization-max-iters=1 -deadargelim -S < %s | FileCheck %s
; RUN: opt -function-specialization -func-specialization-max-iters=0 -deadargelim -S < %s | FileCheck %s --check-prefix=DISABLED
; RUN: opt -function-specialization -func-specialization-avg-iters-cost=1 -deadargelim -S < %s | FileCheck %s
; DISABLED-NOT: @func.1(
; DISABLED-NOT: @func.2(
define internal i32 @func(i32* %0, i32 %1, void (i32*)* nocapture %2) {
%4 = alloca i32, align 4
store i32 %1, i32* %4, align 4
%5 = load i32, i32* %4, align 4
%6 = icmp slt i32 %5, 1
br i1 %6, label %14, label %7
7: ; preds = %3
%8 = load i32, i32* %4, align 4
%9 = sext i32 %8 to i64
%10 = getelementptr inbounds i32, i32* %0, i64 %9
call void %2(i32* %10)
%11 = load i32, i32* %4, align 4
%12 = add nsw i32 %11, -1
%13 = call i32 @func(i32* %0, i32 %12, void (i32*)* %2)
br label %14
14: ; preds = %3, %7
ret i32 0
}
define internal void @increment(i32* nocapture %0) {
%2 = load i32, i32* %0, align 4
%3 = add nsw i32 %2, 1
store i32 %3, i32* %0, align 4
ret void
}
define internal void @decrement(i32* nocapture %0) {
%2 = load i32, i32* %0, align 4
%3 = add nsw i32 %2, -1
store i32 %3, i32* %0, align 4
ret void
}
define i32 @main(i32* %0, i32 %1) {
; CHECK: [[TMP3:%.*]] = call i32 @func.2(i32* [[TMP0:%.*]], i32 [[TMP1:%.*]])
%3 = call i32 @func(i32* %0, i32 %1, void (i32*)* nonnull @increment)
; CHECK: [[TMP4:%.*]] = call i32 @func.1(i32* [[TMP0]], i32 [[TMP3]])
%4 = call i32 @func(i32* %0, i32 %3, void (i32*)* nonnull @decrement)
ret i32 %4
}
; CHECK: @func.1(
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
; CHECK: store i32 [[TMP1:%.*]], i32* [[TMP3]], align 4
; CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
; CHECK: 6:
; CHECK: [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 [[TMP8]]
; CHECK: call void @decrement(i32* [[TMP9]])
; CHECK: [[TMP10:%.*]] = load i32, i32* [[TMP3]], align 4
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
; CHECK: [[TMP12:%.*]] = call i32 @func.1(i32* [[TMP0]], i32 [[TMP11]])
; CHECK: br label [[TMP13]]
; CHECK: 13:
; CHECK: ret i32 0
;
;
; CHECK: @func.2(
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
; CHECK: store i32 [[TMP1:%.*]], i32* [[TMP3]], align 4
; CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
; CHECK: 6:
; CHECK: [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 [[TMP8]]
; CHECK: call void @increment(i32* [[TMP9]])
; CHECK: [[TMP10:%.*]] = load i32, i32* [[TMP3]], align 4
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
; CHECK: [[TMP12:%.*]] = call i32 @func.2(i32* [[TMP0]], i32 [[TMP11]])
; CHECK: br label [[TMP13]]
; CHECK: ret i32 0