1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
Nikita Popov 730509657a [InstCombine] DCE instructions earlier
When InstCombine initially populates the worklist, it already
performs constant folding and DCE. However, as the instructions
are initially visited in program order, this DCE can pick up only
the last instruction of a dead chain, the rest would only get
picked up in the main InstCombine run.

To avoid this, we instead perform the DCE in separate pass over the
collected instructions in reverse order, which will allow us to
pick up full dead instruction chains. We already need to do this
reverse iteration anyway to populate the worklist, so this
shouldn't add extra cost.

This by itself only fixes a small part of the problem though:
The same basic issue also applies during the main InstCombine loop.
We generally always want DCE to occur as early as possible,
because it will allow one-use folds to happen. Address this by also
performing DCE while adding deferred instructions to the main worklist.

This drops the number of tests that perform more than 2 InstCombine
iterations from ~80 to ~40. There's some spurious test changes due
to operand order / icmp toggling.

Differential Revision: https://reviews.llvm.org/D75008
2020-02-27 18:45:59 +01:00

64 lines
2.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @vec_select(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%cmp = icmp slt <4 x i32> %b, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
%sub = sub nsw <4 x i32> zeroinitializer, %a
%t0 = icmp slt <4 x i32> %sext, zeroinitializer
%sext3 = sext <4 x i1> %t0 to <4 x i32>
%t1 = xor <4 x i32> %sext3, <i32 -1, i32 -1, i32 -1, i32 -1>
%t2 = and <4 x i32> %a, %t1
%t3 = and <4 x i32> %sext3, %sub
%cond = or <4 x i32> %t2, %t3
ret <4 x i32> %cond
}
define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
%sext = sext <4 x i1> %cmp to <4 x i32>
%sub = sub nsw <4 x i32> zeroinitializer, %a
%t0 = icmp slt <4 x i32> %sext, zeroinitializer
%sext3 = sext <4 x i1> %t0 to <4 x i32>
%t1 = xor <4 x i32> %sext3, <i32 -1, i32 -1, i32 -1, i32 -1>
%t2 = and <4 x i32> %a, %t1
%t3 = and <4 x i32> %sext3, %sub
%cond = or <4 x i32> %t2, %t3
ret <4 x i32> %cond
}
define <2 x i32> @is_negative_undef_elt(<2 x i32> %a) {
; CHECK-LABEL: @is_negative_undef_elt(
; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
; CHECK-NEXT: ret <2 x i32> [[A_LOBIT]]
;
%cmp = icmp slt <2 x i32> %a, <i32 0, i32 undef>
%sext = sext <2 x i1> %cmp to <2 x i32>
ret <2 x i32> %sext
}
define <2 x i32> @is_positive_undef_elt(<2 x i32> %a) {
; CHECK-LABEL: @is_positive_undef_elt(
; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[A_LOBIT_NOT:%.*]] = xor <2 x i32> [[A_LOBIT]], <i32 -1, i32 -1>
; CHECK-NEXT: ret <2 x i32> [[A_LOBIT_NOT]]
;
%cmp = icmp sgt <2 x i32> %a, <i32 undef, i32 -1>
%sext = sext <2 x i1> %cmp to <2 x i32>
ret <2 x i32> %sext
}