mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[VectorCombine] forward walk through instructions to improve chaining of transforms
This is split off from D79799 - where I was proposing to fully iterate over a function until there are no more transforms. I suspect we are still going to want to do something like that eventually. But we can achieve the same gains much more efficiently on the current set of regression tests just by reversing the order that we visit the instructions. This may also reduce the motivation for D79078, but we are still not getting the optimal pattern for a reduction.
This commit is contained in:
parent
31896780e0
commit
a80619608b
@ -381,11 +381,10 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
|
||||
if (!DT.isReachableFromEntry(&BB))
|
||||
continue;
|
||||
// Do not delete instructions under here and invalidate the iterator.
|
||||
// Walk the block backwards for efficiency. We're matching a chain of
|
||||
// use->defs, so we're more likely to succeed by starting from the bottom.
|
||||
// Walk the block forwards to enable simple iterative chains of transforms.
|
||||
// TODO: It could be more efficient to remove dead instructions
|
||||
// iteratively in this loop rather than waiting until the end.
|
||||
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
|
||||
for (Instruction &I : BB) {
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
continue;
|
||||
MadeChange |= foldExtractExtract(I, TTI);
|
||||
|
@ -5,17 +5,19 @@
|
||||
target triple = "x86_64--"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; FIXME: This should only need 2 'or' instructions.
|
||||
|
||||
define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: @ext_ext_or_reduction_v4i32(
|
||||
; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[Y:%.*]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[Z]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[Z2:%.*]] = extractelement <4 x i32> [[Z]], i32 2
|
||||
; CHECK-NEXT: [[Z012:%.*]] = or i32 [[TMP3]], [[Z2]]
|
||||
; CHECK-NEXT: [[Z3:%.*]] = extractelement <4 x i32> [[Z]], i32 3
|
||||
; CHECK-NEXT: [[Z0123:%.*]] = or i32 [[Z012]], [[Z3]]
|
||||
; CHECK-NEXT: ret i32 [[Z0123]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP4]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[TMP7]]
|
||||
;
|
||||
%z = and <4 x i32> %x, %y
|
||||
%z0 = extractelement <4 x i32> %z, i32 0
|
||||
@ -32,10 +34,10 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X210:%.*]] = add i32 [[TMP3]], [[X2]]
|
||||
; CHECK-NEXT: ret i32 [[X210]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: ret i32 [[TMP5]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
@ -47,14 +49,14 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
|
||||
|
||||
define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
|
||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[Y]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[Y210:%.*]] = add i32 [[TMP3]], [[Y1]]
|
||||
; CHECK-NEXT: [[X2Y210:%.*]] = add i32 [[Y210]], [[Y2]]
|
||||
; CHECK-NEXT: ret i32 [[X2Y210]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[Y]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[TMP7]]
|
||||
;
|
||||
%y0 = extractelement <4 x i32> %y, i32 0
|
||||
%y1 = extractelement <4 x i32> %y, i32 1
|
||||
|
@ -492,12 +492,12 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[Z]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[Z2:%.*]] = extractelement <4 x i32> [[Z]], i32 2
|
||||
; CHECK-NEXT: [[Z012:%.*]] = or i32 [[TMP3]], [[Z2]]
|
||||
; CHECK-NEXT: [[Z3:%.*]] = extractelement <4 x i32> [[Z]], i32 3
|
||||
; CHECK-NEXT: [[Z0123:%.*]] = or i32 [[Z3]], [[Z012]]
|
||||
; CHECK-NEXT: ret i32 [[Z0123]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret i32 [[TMP7]]
|
||||
;
|
||||
%z = and <4 x i32> %x, %y
|
||||
%z0 = extractelement <4 x i32> %z, i32 0
|
||||
@ -514,10 +514,10 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X210:%.*]] = add i32 [[X2]], [[TMP3]]
|
||||
; CHECK-NEXT: ret i32 [[X210]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: ret i32 [[TMP5]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
@ -531,12 +531,12 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
|
||||
; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[Y]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
|
||||
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2
|
||||
; CHECK-NEXT: [[Y210:%.*]] = add i32 [[Y2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
|
||||
; CHECK-NEXT: [[X2Y210:%.*]] = add i32 [[X2]], [[Y210]]
|
||||
; CHECK-NEXT: ret i32 [[X2Y210]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret i32 [[TMP7]]
|
||||
;
|
||||
%y0 = extractelement <4 x i32> %y, i32 0
|
||||
%y1 = extractelement <4 x i32> %y, i32 1
|
||||
|
@ -51,11 +51,9 @@ define <2 x i64> @ins1_ins1_xor(i64 %x, i64 %y) {
|
||||
define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) {
|
||||
; CHECK-LABEL: @ins1_ins1_iterate(
|
||||
; CHECK-NEXT: [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[S0:%.*]] = insertelement <2 x i64> undef, i64 [[S0_SCALAR]], i64 1
|
||||
; CHECK-NEXT: [[I2:%.*]] = insertelement <2 x i64> undef, i64 [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[S1:%.*]] = or <2 x i64> [[S0]], [[I2]]
|
||||
; CHECK-NEXT: [[I3:%.*]] = insertelement <2 x i64> undef, i64 [[Z:%.*]], i32 1
|
||||
; CHECK-NEXT: [[S2:%.*]] = shl <2 x i64> [[I3]], [[S1]]
|
||||
; CHECK-NEXT: [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]]
|
||||
; CHECK-NEXT: [[S2:%.*]] = insertelement <2 x i64> undef, i64 [[S2_SCALAR]], i64 1
|
||||
; CHECK-NEXT: ret <2 x i64> [[S2]]
|
||||
;
|
||||
%i0 = insertelement <2 x i64> undef, i64 %w, i64 1
|
||||
|
Loading…
Reference in New Issue
Block a user