1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[VectorCombine] forward walk through instructions to improve chaining of transforms

This is split off from D79799 - where I was proposing to fully iterate
over a function until there are no more transforms. I suspect we are
still going to want to do something like that eventually.

But we can achieve the same gains much more efficiently on the current
set of regression tests just by reversing the order that we visit the
instructions.

This may also reduce the motivation for D79078, but we are still not
getting the optimal pattern for a reduction.
This commit is contained in:
Sanjay Patel 2020-05-16 13:08:01 -04:00
parent 31896780e0
commit a80619608b
4 changed files with 41 additions and 42 deletions

View File

@ -381,11 +381,10 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
if (!DT.isReachableFromEntry(&BB))
continue;
// Do not delete instructions under here and invalidate the iterator.
// Walk the block backwards for efficiency. We're matching a chain of
// use->defs, so we're more likely to succeed by starting from the bottom.
// Walk the block forwards to enable simple iterative chains of transforms.
// TODO: It could be more efficient to remove dead instructions
// iteratively in this loop rather than waiting until the end.
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
for (Instruction &I : BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
MadeChange |= foldExtractExtract(I, TTI);

View File

@ -5,17 +5,19 @@
target triple = "x86_64--"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; FIXME: This should only need 2 'or' instructions.
define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @ext_ext_or_reduction_v4i32(
; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[Z]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: [[Z2:%.*]] = extractelement <4 x i32> [[Z]], i32 2
; CHECK-NEXT: [[Z012:%.*]] = or i32 [[TMP3]], [[Z2]]
; CHECK-NEXT: [[Z3:%.*]] = extractelement <4 x i32> [[Z]], i32 3
; CHECK-NEXT: [[Z0123:%.*]] = or i32 [[Z012]], [[Z3]]
; CHECK-NEXT: ret i32 [[Z0123]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; CHECK-NEXT: ret i32 [[TMP7]]
;
%z = and <4 x i32> %x, %y
%z0 = extractelement <4 x i32> %z, i32 0
@ -32,10 +34,10 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[X]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
; CHECK-NEXT: [[X210:%.*]] = add i32 [[TMP3]], [[X2]]
; CHECK-NEXT: ret i32 [[X210]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i64 0
; CHECK-NEXT: ret i32 [[TMP5]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
@ -47,14 +49,14 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[Y]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: [[Y210:%.*]] = add i32 [[TMP3]], [[Y1]]
; CHECK-NEXT: [[X2Y210:%.*]] = add i32 [[Y210]], [[Y2]]
; CHECK-NEXT: ret i32 [[X2Y210]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[Y]]
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; CHECK-NEXT: ret i32 [[TMP7]]
;
%y0 = extractelement <4 x i32> %y, i32 0
%y1 = extractelement <4 x i32> %y, i32 1

View File

@ -492,12 +492,12 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[Z]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: [[Z2:%.*]] = extractelement <4 x i32> [[Z]], i32 2
; CHECK-NEXT: [[Z012:%.*]] = or i32 [[TMP3]], [[Z2]]
; CHECK-NEXT: [[Z3:%.*]] = extractelement <4 x i32> [[Z]], i32 3
; CHECK-NEXT: [[Z0123:%.*]] = or i32 [[Z3]], [[Z012]]
; CHECK-NEXT: ret i32 [[Z0123]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0
; CHECK-NEXT: ret i32 [[TMP7]]
;
%z = and <4 x i32> %x, %y
%z0 = extractelement <4 x i32> %z, i32 0
@ -514,10 +514,10 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[X]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
; CHECK-NEXT: [[X210:%.*]] = add i32 [[X2]], [[TMP3]]
; CHECK-NEXT: ret i32 [[X210]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i64 0
; CHECK-NEXT: ret i32 [[TMP5]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
@ -531,12 +531,12 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[Y]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2
; CHECK-NEXT: [[Y210:%.*]] = add i32 [[Y2]], [[TMP3]]
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
; CHECK-NEXT: [[X2Y210:%.*]] = add i32 [[X2]], [[Y210]]
; CHECK-NEXT: ret i32 [[X2Y210]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0
; CHECK-NEXT: ret i32 [[TMP7]]
;
%y0 = extractelement <4 x i32> %y, i32 0
%y1 = extractelement <4 x i32> %y, i32 1

View File

@ -51,11 +51,9 @@ define <2 x i64> @ins1_ins1_xor(i64 %x, i64 %y) {
define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) {
; CHECK-LABEL: @ins1_ins1_iterate(
; CHECK-NEXT: [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]]
; CHECK-NEXT: [[S0:%.*]] = insertelement <2 x i64> undef, i64 [[S0_SCALAR]], i64 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <2 x i64> undef, i64 [[Y:%.*]], i32 1
; CHECK-NEXT: [[S1:%.*]] = or <2 x i64> [[S0]], [[I2]]
; CHECK-NEXT: [[I3:%.*]] = insertelement <2 x i64> undef, i64 [[Z:%.*]], i32 1
; CHECK-NEXT: [[S2:%.*]] = shl <2 x i64> [[I3]], [[S1]]
; CHECK-NEXT: [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]]
; CHECK-NEXT: [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]]
; CHECK-NEXT: [[S2:%.*]] = insertelement <2 x i64> undef, i64 [[S2_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[S2]]
;
%i0 = insertelement <2 x i64> undef, i64 %w, i64 1