1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00
Go to file
Chris Lattner 513ba43053 add a new SimplifyDemandedVectorElts method, which works similarly to
SimplifyDemandedBits.  The idea is that some operations can be simplified if
not all of the computed elements are needed.  Some targets (like x86) have a
large number of intrinsics that operate on a single element, but pass other
elts through unmodified.  If those other elements are not needed, the
intrinsics can be simplified to scalar operations, and insertelement ops can
be removed.

This turns (f.e.):

ushort %Convert_sse(float %f) {
        %tmp = insertelement <4 x float> undef, float %f, uint 0                ; <<4 x float>> [#uses=1]
        %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, uint 1             ; <<4 x float>> [#uses=1]
        %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, uint 2           ; <<4 x float>> [#uses=1]
        %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, uint 3           ; <<4 x float>> [#uses=1]
        %tmp28 = tail call <4 x float> %llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )               ; <<4 x float>> [#uses=1]
        %tmp37 = tail call <4 x float> %llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )               ; <<4 x float>> [#uses=1]
        %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )               ; <<4 x float>> [#uses=1]
        %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )          ; <<4 x float>> [#uses=1]
        %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )              ; <int> [#uses=1]
        %tmp69 = cast int %tmp to ushort                ; <ushort> [#uses=1]
        ret ushort %tmp69
}

into:

ushort %Convert_sse(float %f) {
entry:
        %tmp28 = sub float %f, 1.000000e+00             ; <float> [#uses=1]
        %tmp37 = mul float %tmp28, 5.000000e-01         ; <float> [#uses=1]
        %tmp375 = insertelement <4 x float> undef, float %tmp37, uint 0         ; <<4 x float>> [#uses=1]
        %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )           ; <<4 x float>> [#uses=1]
        %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )            ; <<4 x float>> [#uses=1]
        %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )              ; <int> [#uses=1]
        %tmp69 = cast int %tmp to ushort                ; <ushort> [#uses=1]
        ret ushort %tmp69
}

which improves codegen from:

_Convert_sse:
        movss LCPI1_0, %xmm0
        movss 4(%esp), %xmm1
        subss %xmm0, %xmm1
        movss LCPI1_1, %xmm0
        mulss %xmm0, %xmm1
        movss LCPI1_2, %xmm0
        minss %xmm0, %xmm1
        xorps %xmm0, %xmm0
        maxss %xmm0, %xmm1
        cvttss2si %xmm1, %eax
        andl $65535, %eax
        ret

to:

_Convert_sse:
        movss 4(%esp), %xmm0
        subss LCPI1_0, %xmm0
        mulss LCPI1_1, %xmm0
        movss LCPI1_2, %xmm1
        minss %xmm1, %xmm0
        xorps %xmm1, %xmm1
        maxss %xmm1, %xmm0
        cvttss2si %xmm0, %eax
        andl $65535, %eax
        ret


This is just a first step, it can be extended in many ways.  Testcase here:
Transforms/InstCombine/vec_demanded_elts.ll

llvm-svn: 30752
2006-10-05 06:55:50 +00:00
autoconf Fix misleading warning message 2006-10-02 17:23:42 +00:00
docs Update documentaion of node annotation (coloring) in viewGraph. 2006-10-02 12:28:07 +00:00
examples Use llvm-config to determine what to link in 2006-09-04 06:04:03 +00:00
include/llvm Add insertelement/extractelement helper ctors. 2006-10-05 06:24:58 +00:00
lib add a new SimplifyDemandedVectorElts method, which works similarly to 2006-10-05 06:55:50 +00:00
projects Don't build the runtime library if LLVMGCC is not configured. 2006-10-02 19:10:56 +00:00
runtime Describe and date modifications we made per LGPL requirements. 2006-08-08 14:47:54 +00:00
test new testcase 2006-10-05 06:51:54 +00:00
tools simplify code 2006-09-28 23:24:48 +00:00
utils Fix more static dtor issues 2006-10-04 21:52:35 +00:00
win32 Keep Visual Studio happy. 2006-05-03 00:28:50 +00:00
Xcode Adding new files. 2006-09-07 22:07:57 +00:00
.cvsignore Ignore stuff built by "make dist". 2006-04-07 15:55:18 +00:00
configure fix misleading warning 2006-10-02 17:24:55 +00:00
CREDITS.TXT The list is sorted by name. 2006-08-29 01:42:47 +00:00
LICENSE.TXT burg is gone 2006-09-11 17:28:11 +00:00
llvm.spec.in Minor technical correction in documentation. 2006-08-21 01:58:57 +00:00
Makefile Document build order dependencies. Make sure that llvm-config is built before 2006-09-04 04:27:07 +00:00
Makefile.common Update comments to reflect new variable names. Patch contributed by 2005-02-14 16:02:19 +00:00
Makefile.config.in Make the name of the project consistent with that specified in the 2006-08-07 23:23:39 +00:00
Makefile.rules Move CompileCommonOpts to the end of the list so that EXTRA_OPTIONS=-O0 will 2006-09-29 18:47:13 +00:00
README.txt Make the text of this file a little more useful. 2004-09-02 22:49:27 +00:00

Low Level Virtual Machine (LLVM)
================================

This directory and its subdirectories contain source code for the Low Level 
Virtual Machine, a toolkit for the construction of highly optimized compilers,
optimizers, and runtime environments. 

LLVM is open source software. You may freely distribute it under the terms of
the license agreement found in LICENSE.txt.

Please see the HTML documentation provided in docs/index.html for further
assistance with LLVM.