1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00
llvm-mirror/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
Guozhi Wei a059f4193c [CodeGenPrepare] Delete intrinsic call to llvm.assume to enable more tailcall
The attached test case is simplified from tcmalloc. Both function calls should be optimized as tailcall. But llvm can only optimize the first call. The second call can't be optimized because function dupRetToEnableTailCallOpts failed to duplicate ret into block case2.

There 2 problems blocked the duplication:

  1 Intrinsic call llvm.assume is not handled by dupRetToEnableTailCallOpts.
  2 The control flow is more complex than expected, dupRetToEnableTailCallOpts can only duplicate ret into its predecessor, but here we have an intermediate block between call and ret.

The solutions:

  1 Since CodeGenPrepare is already at the end of LLVM IR phase, we can simply delete the intrinsic call to llvm.assume.
  2 A general solution to the complex control flow is hard, but for this case, after exit2 is duplicated into case1, exit2 is the only successor of exit1 and exit1 is the only predecessor of exit2, so they can be combined through eliminateFallThrough. But this function is called too late, there is no more dupRetToEnableTailCallOpts after it. We can add an earlier call to eliminateFallThrough to solve it.

Differential Revision: https://reviews.llvm.org/D76539
2020-03-31 11:55:51 -07:00

63 lines
1.5 KiB
LLVM

; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; The first cast should be sunk into block2, in order that the
; instruction selector can form an efficient
; i64 * i64 -> i128 multiplication.
define i128 @sink(i64* %mem1, i64* %mem2) {
; CHECK-LABEL: block1:
; CHECK-NEXT: load
block1:
%l1 = load i64, i64* %mem1
%s1 = sext i64 %l1 to i128
br label %block2
; CHECK-NEXT: sext
; CHECK-NEXT: load
; CHECK-NEXT: sext
block2:
%l2 = load i64, i64* %mem2
%s2 = sext i64 %l2 to i128
%res = mul i128 %s1, %s2
ret i128 %res
}
; The first cast should be hoisted into block1, in order that the
; instruction selector can form an extend-load.
define i64 @hoist(i32* %mem1, i32* %mem2) {
; CHECK-LABEL: block1:
; CHECK-NEXT: load
; CHECK-NEXT: sext
block1:
%l1 = load i32, i32* %mem1
br label %block2
; CHECK-NEXT: load
; CHECK-NEXT: sext
block2:
%s1 = sext i32 %l1 to i64
%l2 = load i32, i32* %mem2
%s2 = sext i32 %l2 to i64
%res = mul i64 %s1, %s2
ret i64 %res
}
; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite
; loop.
define i128 @use_ext_source() {
block1:
%v1 = or i64 undef, undef
%v2 = zext i64 %v1 to i128
br i1 undef, label %block2, label %block3
block2:
%v3 = add i64 %v1, 1
%v4 = zext i64 %v3 to i128
br label %block3
block3:
%res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ]
ret i128 %res
}