1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[DAGCombiner] Fix visitSTORE to continue processing current SDNode, if findBetterNeighborChains doesn't actually CombineTo it.

Summary:
findBetterNeighborChains may or may not find a better chain for each node it finds, which include the node ("St") that visitSTORE is currently processing. If no better chain is found for St, visitSTORE should continue instead of return SDValue(St, 0), as if it's CombinedTo'ed.

This fixes bug 28130. There might be other ways to make the test pass (see D21409). I think both of the patches are fixing actual bugs revealed by the same testcase.

Reviewers: echristo, wschmidt, hfinkel, kbarton, amehsan, arsenm, nemanjai, bogner

Subscribers: mehdi_amini, nemanjai, llvm-commits

Differential Revision: http://reviews.llvm.org/D21692

llvm-svn: 274644
This commit is contained in:
Tim Shen 2016-07-06 17:44:03 +00:00
parent d59d4568d1
commit ff6926ff3e
2 changed files with 82 additions and 6 deletions

View File

@ -392,8 +392,12 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
/// Do FindBetterChain for a store and any possibly adjacent stores on
/// consecutive chains.
/// Try to replace a store and any possibly adjacent stores on
/// consecutive chains with better chains. Return true only if St is
/// replaced.
///
/// Notice that other chains may still be replaced even if the function
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
@ -12102,6 +12106,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// manipulation. Return the original node to not do anything else.
return SDValue(ST, 0);
}
Chain = ST->getChain();
}
// Try transforming N to an indexed store.
@ -14950,7 +14955,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
@ -15010,7 +15015,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
}
}
bool MadeChange = false;
bool MadeChangeToSt = false;
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
for (StoreSDNode *ChainedStore : ChainedStores) {
@ -15018,7 +15023,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
if (Chain != BetterChain) {
MadeChange = true;
if (ChainedStore == St)
MadeChangeToSt = true;
BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
}
}
@ -15028,7 +15034,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
for (auto Replacement : BetterChains)
replaceStoreChain(Replacement.first, Replacement.second);
return MadeChange;
return MadeChangeToSt;
}
/// This is the entry point for the file.

View File

@ -0,0 +1,70 @@
; RUN: llc -O0 < %s | FileCheck %s
target triple = "powerpc64le-unknown-linux-gnu"
%StructA = type { double, double, double, double, double, double, double, double }
define void @Test(%StructA* %tmp) unnamed_addr #0 align 2 {
; CHECK-LABEL: Test:
; CHECK: lxvd2x
; CHECK-NEXT: xxswapd
; CHECK: lxvd2x
; CHECK-NEXT: xxswapd
; CHECK: lxvd2x
; CHECK-NEXT: xxswapd
; CHECK: lxvd2x
; CHECK-NEXT: xxswapd
; CHECK: xxswapd [[OUTPUT:[0-9]+]]
; CHECK-NEXT: stxvd2x [[OUTPUT]]
bb:
%tmp2 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 0
%tmp4 = bitcast %StructA* %tmp to <2 x double>*
%tmp5 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 2
%tmp9 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 4
%tmp11 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 5
%tmp13 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 6
%tmp15 = getelementptr inbounds %StructA, %StructA* %tmp, i64 0, i32 7
%tmp18 = load double, double* %tmp2, align 16
%tmp19 = load double, double* %tmp11, align 8
%tmp20 = load double, double* %tmp9, align 16
%tmp21 = fsub double 1.210000e+04, %tmp20
%tmp22 = fmul double %tmp18, %tmp21
%tmp23 = fadd double %tmp20, %tmp22
%tmp24 = load double, double* %tmp13, align 16
%tmp25 = fsub double 1.000000e+02, %tmp24
%tmp26 = fmul double %tmp18, %tmp25
%tmp27 = fadd double %tmp24, %tmp26
%tmp28 = load double, double* %tmp15, align 8
%tmp29 = insertelement <2 x double> undef, double %tmp19, i32 0
%tmp30 = insertelement <2 x double> %tmp29, double %tmp28, i32 1
%tmp31 = fsub <2 x double> <double 1.100000e+04, double 1.100000e+02>, %tmp30
%tmp32 = insertelement <2 x double> undef, double %tmp18, i32 0
%tmp33 = insertelement <2 x double> %tmp32, double %tmp18, i32 1
%tmp34 = fmul <2 x double> %tmp33, %tmp31
%tmp35 = fadd <2 x double> %tmp30, %tmp34
%tmp36 = bitcast double* %tmp5 to <2 x double>*
%tmp37 = load <2 x double>, <2 x double>* %tmp36, align 16
%tmp38 = fsub <2 x double> <double 1.000000e+00, double 1.000000e+04>, %tmp37
%tmp39 = fmul <2 x double> %tmp33, %tmp38
%tmp40 = fadd <2 x double> %tmp37, %tmp39
%tmp41 = fsub <2 x double> <double 1.000000e+00, double 1.000000e+04>, %tmp40
%tmp42 = fmul <2 x double> %tmp33, %tmp41
%tmp43 = fadd <2 x double> %tmp40, %tmp42
%tmp44 = fsub <2 x double> <double 1.200000e+04, double 1.200000e+02>, %tmp35
%tmp45 = fmul <2 x double> %tmp33, %tmp44
%tmp46 = fadd <2 x double> %tmp35, %tmp45
%tmp48 = fsub double 1.440000e+04, %tmp23
%tmp49 = fmul double %tmp18, %tmp48
%tmp50 = fadd double %tmp23, %tmp49
store double %tmp50, double* %tmp9, align 16
%tmp51 = fsub double 1.000000e+02, %tmp27
%tmp52 = fmul double %tmp18, %tmp51
%tmp53 = fadd double %tmp27, %tmp52
store double %tmp53, double* %tmp13, align 16
%tmp54 = extractelement <2 x double> %tmp46, i32 1
store double %tmp54, double* %tmp15, align 8
%tmp55 = bitcast double* %tmp5 to <2 x double>*
store <2 x double> %tmp43, <2 x double>* %tmp55, align 16
ret void
}
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }