mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
68ac397f7c
If a loaded value is replicated it is best to combine these two operations into a VLREP (load and replicate), but isel will not produce this if the load has other users as well. This patch handles this by putting the other users of the load to use the REPLICATE 0-element instead of the load. This way the load has only the REPLICATE node as user, and we get a VLREP. Review: Ulrich Weigand https://reviews.llvm.org/D54264 llvm-svn: 346746
57 lines
2.2 KiB
LLVM
57 lines
2.2 KiB
LLVM
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
|
|
|
; Test that a replicate of a load gets folded to vlrep also in cases where
|
|
; the load has multiple users.
|
|
|
|
; CHECK-NOT: vrep
|
|
|
|
|
|
define double @fun(double* %Vsrc, <2 x double> %T) {
|
|
entry:
|
|
%Vgep1 = getelementptr double, double* %Vsrc, i64 0
|
|
%Vld1 = load double, double* %Vgep1
|
|
%Vgep2 = getelementptr double, double* %Vsrc, i64 1
|
|
%Vld2 = load double, double* %Vgep2
|
|
%Vgep3 = getelementptr double, double* %Vsrc, i64 2
|
|
%Vld3 = load double, double* %Vgep3
|
|
%Vgep4 = getelementptr double, double* %Vsrc, i64 3
|
|
%Vld4 = load double, double* %Vgep4
|
|
%Vgep5 = getelementptr double, double* %Vsrc, i64 4
|
|
%Vld5 = load double, double* %Vgep5
|
|
%Vgep6 = getelementptr double, double* %Vsrc, i64 5
|
|
%Vld6 = load double, double* %Vgep6
|
|
|
|
%V19 = insertelement <2 x double> undef, double %Vld1, i32 0
|
|
%V20 = shufflevector <2 x double> %V19, <2 x double> undef, <2 x i32> zeroinitializer
|
|
%V21 = insertelement <2 x double> undef, double %Vld4, i32 0
|
|
%V22 = insertelement <2 x double> %V21, double %Vld5, i32 1
|
|
%V23 = fmul <2 x double> %V20, %V22
|
|
%V24 = fadd <2 x double> %T, %V23
|
|
%V25 = insertelement <2 x double> %V19, double %Vld2, i32 1
|
|
%V26 = insertelement <2 x double> undef, double %Vld6, i32 0
|
|
%V27 = insertelement <2 x double> %V26, double %Vld6, i32 1
|
|
%V28 = fmul <2 x double> %V25, %V27
|
|
%V29 = fadd <2 x double> %T, %V28
|
|
%V30 = insertelement <2 x double> undef, double %Vld2, i32 0
|
|
%V31 = shufflevector <2 x double> %V30, <2 x double> undef, <2 x i32> zeroinitializer
|
|
%V32 = insertelement <2 x double> undef, double %Vld5, i32 0
|
|
%V33 = insertelement <2 x double> %V32, double %Vld6, i32 1
|
|
%V34 = fmul <2 x double> %V31, %V33
|
|
%V35 = fadd <2 x double> %T, %V34
|
|
%V36 = insertelement <2 x double> undef, double %Vld3, i32 0
|
|
%V37 = shufflevector <2 x double> %V36, <2 x double> undef, <2 x i32> zeroinitializer
|
|
%V38 = fmul <2 x double> %V37, %V22
|
|
%V39 = fadd <2 x double> %T, %V38
|
|
%Vmul37 = fmul double %Vld3, %Vld6
|
|
%Vadd38 = fadd double %Vmul37, %Vmul37
|
|
|
|
%VA0 = fadd <2 x double> %V24, %V29
|
|
%VA1 = fadd <2 x double> %VA0, %V35
|
|
%VA2 = fadd <2 x double> %VA1, %V39
|
|
|
|
%VE0 = extractelement <2 x double> %VA2, i32 0
|
|
%VS1 = fadd double %VE0, %Vadd38
|
|
|
|
ret double %VS1
|
|
}
|