1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

X86: when constructing VZEXT_LOAD from other loads, makes sure its output

chain is correctly setup.

As an example, if the original load must happen before later stores, we need
to make sure the constructed VZEXT_LOAD is constrained to be before the stores.

rdar://12684358

llvm-svn: 167859
This commit is contained in:
Manman Ren 2012-11-13 19:13:05 +00:00
parent 9c5e333c90
commit e98ec5dd77
2 changed files with 63 additions and 0 deletions

View File

@ -14203,6 +14203,18 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
Ld->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
// Make sure the newly-created LOAD is in the same position as Ld in
// terms of dependency. We create a TokenFactor for Ld and ResNode,
// and update uses of Ld's output chain to use the TokenFactor.
if (Ld->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
SDValue(ResNode.getNode(), 1));
}
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
}
}

View File

@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
ret <8 x float>%S
}
; rdar://12684358
; Make sure loads happen before stores.
; CHECK: swap8doubles
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
; CHECK: vextractf128
; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
; CHECK: vextractf128
; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
entry:
%add.ptr = getelementptr inbounds double* %A, i64 2
%v.i = bitcast double* %A to <2 x double>*
%0 = load <2 x double>* %v.i, align 1
%shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%v1.i = bitcast double* %add.ptr to <2 x double>*
%1 = load <2 x double>* %v1.i, align 1
%2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
%add.ptr1 = getelementptr inbounds double* %A, i64 6
%add.ptr2 = getelementptr inbounds double* %A, i64 4
%v.i27 = bitcast double* %add.ptr2 to <2 x double>*
%3 = load <2 x double>* %v.i27, align 1
%shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
%4 = load <2 x double>* %v1.i29, align 1
%5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
%6 = bitcast double* %C to <4 x double>*
%7 = load <4 x double>* %6, align 32
%add.ptr5 = getelementptr inbounds double* %C, i64 4
%8 = bitcast double* %add.ptr5 to <4 x double>*
%9 = load <4 x double>* %8, align 32
%shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
%10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
%shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
%11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
store <2 x double> %10, <2 x double>* %v1.i, align 16
store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
store <2 x double> %11, <2 x double>* %v1.i29, align 16
store <4 x double> %2, <4 x double>* %6, align 32
store <4 x double> %5, <4 x double>* %8, align 32
ret void
}
declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone