1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00
llvm-mirror/test/CodeGen/SystemZ/vec-move-21.ll
Jonas Paulsson 68ac397f7c [SystemZ] Increase the number of VLREPs
If a loaded value is replicated it is best to combine these two operations
into a VLREP (load and replicate), but isel will not produce this if the load
has other users as well.

This patch handles this by putting the other users of the load to use the
REPLICATE 0-element instead of the load. This way the load has only the
REPLICATE node as user, and we get a VLREP.

Review: Ulrich Weigand
https://reviews.llvm.org/D54264

llvm-svn: 346746
2018-11-13 08:37:09 +00:00

57 lines
2.2 KiB
LLVM

; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test that a replicate of a load gets folded to vlrep also in cases where
; the load has multiple users.
; CHECK-NOT: vrep
define double @fun(double* %Vsrc, <2 x double> %T) {
entry:
%Vgep1 = getelementptr double, double* %Vsrc, i64 0
%Vld1 = load double, double* %Vgep1
%Vgep2 = getelementptr double, double* %Vsrc, i64 1
%Vld2 = load double, double* %Vgep2
%Vgep3 = getelementptr double, double* %Vsrc, i64 2
%Vld3 = load double, double* %Vgep3
%Vgep4 = getelementptr double, double* %Vsrc, i64 3
%Vld4 = load double, double* %Vgep4
%Vgep5 = getelementptr double, double* %Vsrc, i64 4
%Vld5 = load double, double* %Vgep5
%Vgep6 = getelementptr double, double* %Vsrc, i64 5
%Vld6 = load double, double* %Vgep6
%V19 = insertelement <2 x double> undef, double %Vld1, i32 0
%V20 = shufflevector <2 x double> %V19, <2 x double> undef, <2 x i32> zeroinitializer
%V21 = insertelement <2 x double> undef, double %Vld4, i32 0
%V22 = insertelement <2 x double> %V21, double %Vld5, i32 1
%V23 = fmul <2 x double> %V20, %V22
%V24 = fadd <2 x double> %T, %V23
%V25 = insertelement <2 x double> %V19, double %Vld2, i32 1
%V26 = insertelement <2 x double> undef, double %Vld6, i32 0
%V27 = insertelement <2 x double> %V26, double %Vld6, i32 1
%V28 = fmul <2 x double> %V25, %V27
%V29 = fadd <2 x double> %T, %V28
%V30 = insertelement <2 x double> undef, double %Vld2, i32 0
%V31 = shufflevector <2 x double> %V30, <2 x double> undef, <2 x i32> zeroinitializer
%V32 = insertelement <2 x double> undef, double %Vld5, i32 0
%V33 = insertelement <2 x double> %V32, double %Vld6, i32 1
%V34 = fmul <2 x double> %V31, %V33
%V35 = fadd <2 x double> %T, %V34
%V36 = insertelement <2 x double> undef, double %Vld3, i32 0
%V37 = shufflevector <2 x double> %V36, <2 x double> undef, <2 x i32> zeroinitializer
%V38 = fmul <2 x double> %V37, %V22
%V39 = fadd <2 x double> %T, %V38
%Vmul37 = fmul double %Vld3, %Vld6
%Vadd38 = fadd double %Vmul37, %Vmul37
%VA0 = fadd <2 x double> %V24, %V29
%VA1 = fadd <2 x double> %VA0, %V35
%VA2 = fadd <2 x double> %VA1, %V39
%VE0 = extractelement <2 x double> %VA2, i32 0
%VS1 = fadd double %VE0, %Vadd38
ret double %VS1
}