1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-25 05:52:53 +02:00
llvm-mirror/test/Transforms/InstCombine/zext.ll
Tobias Grosser 1b0721730e [InstCombine] Refactor optimization of zext(or(icmp, icmp)) to enable more aggressive cast-folding
Summary:
InstCombine unfolds expressions of the form `zext(or(icmp, icmp))` to `or(zext(icmp), zext(icmp))` such that in a later iteration of InstCombine the exposed `zext(icmp)` instructions can be optimized. We now combine this unfolding and the subsequent `zext(icmp)` optimization to be performed together. Since the unfolding doesn't happen separately anymore, we also again enable the folding of `logic(cast(icmp), cast(icmp))` expressions to `cast(logic(icmp, icmp))` which had been disabled due to its interference with the unfolding transformation.

Tested via `make check` and `lnt`.

Background
==========

For a better understanding on how it came to this change we subsequently summarize its history. In commit r275989 we've already tried to enable the folding of `logic(cast(icmp), cast(icmp))` to `cast(logic(icmp, icmp))` which had to be reverted in r276106 because it could lead to an endless loop in InstCombine (also see http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20160718/374347.html). The root of this problem is that in `visitZExt()` in InstCombineCasts.cpp there also exists a reverse of the above folding transformation, that unfolds `zext(or(icmp, icmp))` to `or(zext(icmp), zext(icmp))` in order to expose `zext(icmp)` operations which would then possibly be eliminated by subsequent iterations of InstCombine. However, before these `zext(icmp)` would be eliminated the folding from r275989 could kick in and cause InstCombine to endlessly switch back and forth between the folding and the unfolding transformation. This is the reason why we now combine the `zext`-unfolding and the elimination of the exposed `zext(icmp)` to happen at one go because this enables us to still allow the cast-folding in `logic(cast(icmp), cast(icmp))` without entering an endless loop again.

Details on the submitted changes
================================

- In `visitZExt()` we combine the unfolding and optimization of `zext` instructions.
- In `transformZExtICmp()` we have to use `Builder->CreateIntCast()` instead of `CastInst::CreateIntegerCast()` to make sure that the new `CastInst` is inserted in a `BasicBlock`. The new calls to `transformZExtICmp()` that we introduce in `visitZExt()` would otherwise cause according assertions to be triggered (in our case this happend, for example, with lnt for the MultiSource/Applications/sqlite3 and SingleSource/Regression/C++/EH/recursive-throw tests). The subsequent usage of `replaceInstUsesWith()` is necessary to ensure that the new `CastInst` replaces the `ZExtInst` accordingly.
- In InstCombineAndOrXor.cpp we again allow the folding of casts on `icmp` instructions.
- The instruction order in the optimized IR for the zext-or-icmp.ll test case is different with the introduced changes.
- The test cases in zext.ll have been adopted from the reverted commits r275989 and r276105.

Reviewers: grosser, majnemer, spatel

Subscribers: eli.friedman, majnemer, llvm-commits

Differential Revision: https://reviews.llvm.org/D22864

Contributed-by: Matthias Reisinger <d412vv1n@gmail.com>
llvm-svn: 277635
2016-08-03 19:30:35 +00:00

142 lines
4.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define i64 @test_sext_zext(i16 %A) {
; CHECK-LABEL: @test_sext_zext(
; CHECK-NEXT: [[C2:%.*]] = zext i16 %A to i64
; CHECK-NEXT: ret i64 [[C2]]
;
%c1 = zext i16 %A to i32
%c2 = sext i32 %c1 to i64
ret i64 %c2
}
define <2 x i64> @test2(<2 x i1> %A) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i1> %A, <i1 true, i1 true>
; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i1> [[XOR]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ZEXT]]
;
%xor = xor <2 x i1> %A, <i1 true, i1 true>
%zext = zext <2 x i1> %xor to <2 x i64>
ret <2 x i64> %zext
}
define <2 x i64> @test3(<2 x i64> %A) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> %A, <i64 23, i64 42>
; CHECK-NEXT: ret <2 x i64> [[AND]]
;
%trunc = trunc <2 x i64> %A to <2 x i32>
%and = and <2 x i32> %trunc, <i32 23, i32 42>
%zext = zext <2 x i32> %and to <2 x i64>
ret <2 x i64> %zext
}
define <2 x i64> @test4(<2 x i64> %A) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i64> %A, <i64 4294967295, i64 4294967295>
; CHECK-NEXT: [[XOR:%.*]] = and <2 x i64> [[TMP1]], <i64 23, i64 42>
; CHECK-NEXT: ret <2 x i64> [[XOR]]
;
%trunc = trunc <2 x i64> %A to <2 x i32>
%and = and <2 x i32> %trunc, <i32 23, i32 42>
%xor = xor <2 x i32> %and, <i32 23, i32 42>
%zext = zext <2 x i32> %xor to <2 x i64>
ret <2 x i64> %zext
}
define i64 @fold_xor_zext_sandwich(i1 %a) {
; CHECK-LABEL: @fold_xor_zext_sandwich(
; CHECK-NEXT: [[TMP1:%.*]] = xor i1 %a, true
; CHECK-NEXT: [[ZEXT2:%.*]] = zext i1 [[TMP1]] to i64
; CHECK-NEXT: ret i64 [[ZEXT2]]
;
%zext1 = zext i1 %a to i32
%xor = xor i32 %zext1, 1
%zext2 = zext i32 %xor to i64
ret i64 %zext2
}
define <2 x i64> @fold_xor_zext_sandwich_vec(<2 x i1> %a) {
; CHECK-LABEL: @fold_xor_zext_sandwich_vec(
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> %a, <i1 true, i1 true>
; CHECK-NEXT: [[ZEXT2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[ZEXT2]]
;
%zext1 = zext <2 x i1> %a to <2 x i32>
%xor = xor <2 x i32> %zext1, <i32 1, i32 1>
%zext2 = zext <2 x i32> %xor to <2 x i64>
ret <2 x i64> %zext2
}
; Assert that zexts in and(zext(icmp), zext(icmp)) can be folded.
; CHECK-LABEL: @fold_and_zext_icmp(
; CHECK-NEXT: [[ICMP1:%.*]] = icmp sgt i64 %a, %b
; CHECK-NEXT: [[ICMP2:%.*]] = icmp slt i64 %a, %c
; CHECK-NEXT: [[AND:%.*]] = and i1 [[ICMP1]], [[ICMP2]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[AND]] to i8
; CHECK-NEXT: ret i8 [[ZEXT]]
define i8 @fold_and_zext_icmp(i64 %a, i64 %b, i64 %c) {
%1 = icmp sgt i64 %a, %b
%2 = zext i1 %1 to i8
%3 = icmp slt i64 %a, %c
%4 = zext i1 %3 to i8
%5 = and i8 %2, %4
ret i8 %5
}
; Assert that zexts in or(zext(icmp), zext(icmp)) can be folded.
; CHECK-LABEL: @fold_or_zext_icmp(
; CHECK-NEXT: [[ICMP1:%.*]] = icmp sgt i64 %a, %b
; CHECK-NEXT: [[ICMP2:%.*]] = icmp slt i64 %a, %c
; CHECK-NEXT: [[OR:%.*]] = or i1 [[ICMP1]], [[ICMP2]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[OR]] to i8
; CHECK-NEXT: ret i8 [[ZEXT]]
define i8 @fold_or_zext_icmp(i64 %a, i64 %b, i64 %c) {
%1 = icmp sgt i64 %a, %b
%2 = zext i1 %1 to i8
%3 = icmp slt i64 %a, %c
%4 = zext i1 %3 to i8
%5 = or i8 %2, %4
ret i8 %5
}
; Assert that zexts in xor(zext(icmp), zext(icmp)) can be folded.
; CHECK-LABEL: @fold_xor_zext_icmp(
; CHECK-NEXT: [[ICMP1:%.*]] = icmp sgt i64 %a, %b
; CHECK-NEXT: [[ICMP2:%.*]] = icmp slt i64 %a, %c
; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[ICMP1]], [[ICMP2]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[XOR]] to i8
; CHECK-NEXT: ret i8 [[ZEXT]]
define i8 @fold_xor_zext_icmp(i64 %a, i64 %b, i64 %c) {
%1 = icmp sgt i64 %a, %b
%2 = zext i1 %1 to i8
%3 = icmp slt i64 %a, %c
%4 = zext i1 %3 to i8
%5 = xor i8 %2, %4
ret i8 %5
}
; Assert that zexts in logic(zext(icmp), zext(icmp)) are also folded accross
; nested logical operators.
; CHECK-LABEL: @fold_nested_logic_zext_icmp(
; CHECK-NEXT: [[ICMP1:%.*]] = icmp sgt i64 %a, %b
; CHECK-NEXT: [[ICMP2:%.*]] = icmp slt i64 %a, %c
; CHECK-NEXT: [[AND:%.*]] = and i1 [[ICMP1]], [[ICMP2]]
; CHECK-NEXT: [[ICMP3:%.*]] = icmp eq i64 %a, %d
; CHECK-NEXT: [[OR:%.*]] = or i1 [[AND]], [[ICMP3]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[OR]] to i8
; CHECK-NEXT: ret i8 [[ZEXT]]
define i8 @fold_nested_logic_zext_icmp(i64 %a, i64 %b, i64 %c, i64 %d) {
%1 = icmp sgt i64 %a, %b
%2 = zext i1 %1 to i8
%3 = icmp slt i64 %a, %c
%4 = zext i1 %3 to i8
%5 = and i8 %2, %4
%6 = icmp eq i64 %a, %d
%7 = zext i1 %6 to i8
%8 = or i8 %5, %7
ret i8 %8
}