1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 03:53:04 +02:00
Commit Graph

138 Commits

Author SHA1 Message Date
Chris Lattner
104db817c8 Constant fold all of the vector binops. This allows us to compile this:
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"

aka:

void %test2(<16 x sbyte>* %P) {
  store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
  ret void
}

into this:

_test2:
        mfspr r2, 256
        oris r4, r2, 32768
        mtspr 256, r4
        li r4, lo16(LCPI2_0)
        lis r5, ha16(LCPI2_0)
        lvx v0, r5, r4
        stvx v0, 0, r3
        mtspr 256, r2
        blr

instead of this:

_test2:
        mfspr r2, 256
        oris r4, r2, 49152
        mtspr 256, r4
        li r4, lo16(LCPI2_0)
        lis r5, ha16(LCPI2_0)
        vspltisb v0, 8
        lvx v1, r5, r4
        vxor v0, v1, v0
        stvx v0, 0, r3
        mtspr 256, r2
        blr

... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html

llvm-svn: 27343
2006-04-02 03:25:57 +00:00
Chris Lattner
52732a272f Implement constant folding of bit_convert of arbitrary constant vbuild_vector nodes.
llvm-svn: 27341
2006-04-02 02:53:43 +00:00
Chris Lattner
b088cfc01a Delete identity shuffles, implementing CodeGen/Generic/vector-identity-shuffle.ll
llvm-svn: 27317
2006-03-31 22:16:43 +00:00
Chris Lattner
0e7da656a7 Remove dead *extloads. This allows us to codegen vector.ll:test_extract_elt
to:

test_extract_elt:
        alloc r3 = ar.pfs,0,1,0,0
        adds r8 = 12, r32
        ;;
        ldfs f8 = [r8]
        mov ar.pfs = r3
        br.ret.sptk.many rp

instead of:

test_extract_elt:
        alloc r3 = ar.pfs,0,1,0,0
        adds r8 = 28, r32
        adds r9 = 24, r32
        adds r10 = 20, r32
        adds r11 = 16, r32
        ;;
        ldfs f6 = [r8]
        ;;
        ldfs f6 = [r9]
        adds r8 = 12, r32
        adds r9 = 8, r32
        adds r14 = 4, r32
        ;;
        ldfs f6 = [r10]
        ;;
        ldfs f6 = [r11]
        ldfs f8 = [r8]
        ;;
        ldfs f6 = [r9]
        ;;
        ldfs f6 = [r14]
        ;;
        ldfs f6 = [r32]
        mov ar.pfs = r3
        br.ret.sptk.many rp

llvm-svn: 27297
2006-03-31 18:10:41 +00:00
Chris Lattner
c3be332547 Delete dead loads in the dag. This allows us to compile
vector.ll:test_extract_elt2 into:

_test_extract_elt2:
        lfd f1, 32(r3)
        blr

instead of:

_test_extract_elt2:
        lfd f0, 56(r3)
        lfd f0, 48(r3)
        lfd f0, 40(r3)
        lfd f1, 32(r3)
        lfd f0, 24(r3)
        lfd f0, 16(r3)
        lfd f0, 8(r3)
        lfd f0, 0(r3)
        blr

llvm-svn: 27296
2006-03-31 18:06:18 +00:00
Chris Lattner
95a8c4fb11 When building a VVECTOR_SHUFFLE node from extract_element operations, make
sure to build it as SHUFFLE(X, undef, mask), not SHUFFLE(X, X, mask).

The later is not canonical form, and prevents the PPC splat pattern from
matching.  For a particular splat, we go from generating this:

	li r10, lo16(LCPI1_0)
	lis r11, ha16(LCPI1_0)
	lvx v3, r11, r10
	vperm v3, v2, v2, v3

to generating:

	vspltw v3, v2, 3

llvm-svn: 27236
2006-03-28 22:19:47 +00:00
Chris Lattner
017e8f1798 Canonicalize VECTOR_SHUFFLE(X, X, Y) -> VECTOR_SHUFFLE(X,undef,Y')
llvm-svn: 27235
2006-03-28 22:11:53 +00:00
Chris Lattner
a623f6f696 Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node.  For this:

void test(__m128 *res, __m128 *A, __m128 *B) {
  *res = _mm_unpacklo_ps(*A, *B);
}

we now produce this code:

_test:
        movl 8(%esp), %eax
        movaps (%eax), %xmm0
        movl 12(%esp), %eax
        unpcklps (%eax), %xmm0
        movl 4(%esp), %eax
        movaps %xmm0, (%eax)
        ret

instead of this:

_test:
        subl $76, %esp
        movl 88(%esp), %eax
        movaps (%eax), %xmm0
        movaps %xmm0, (%esp)
        movaps %xmm0, 32(%esp)
        movss 4(%esp), %xmm0
        movss 32(%esp), %xmm1
        unpcklps %xmm0, %xmm1
        movl 84(%esp), %eax
        movaps (%eax), %xmm0
        movaps %xmm0, 16(%esp)
        movaps %xmm0, 48(%esp)
        movss 20(%esp), %xmm0
        movss 48(%esp), %xmm2
        unpcklps %xmm0, %xmm2
        unpcklps %xmm1, %xmm2
        movl 80(%esp), %eax
        movaps %xmm2, (%eax)
        addl $76, %esp
        ret

GCC produces this (with -fomit-frame-pointer):

_test:
        subl    $12, %esp
        movl    20(%esp), %eax
        movaps  (%eax), %xmm0
        movl    24(%esp), %eax
        unpcklps        (%eax), %xmm0
        movl    16(%esp), %eax
        movaps  %xmm0, (%eax)
        addl    $12, %esp
        ret

llvm-svn: 27233
2006-03-28 20:28:38 +00:00
Chris Lattner
cad173698d Don't crash on X^X if X is a vector. Instead, produce a vector of zeros.
llvm-svn: 27229
2006-03-28 19:11:05 +00:00
Chris Lattner
62185c0496 Don't call SimplifyDemandedBits on vectors
llvm-svn: 27128
2006-03-25 22:19:00 +00:00
Chris Lattner
c9c081fc40 fold insertelement(buildvector) -> buildvector if the inserted element # is
a constant.  This implements test_constant_insert in CodeGen/Generic/vector.ll

llvm-svn: 26851
2006-03-19 01:27:56 +00:00
Nate Begeman
42736d46b2 Remove BRTWOWAY*
Make the PPC backend not dependent on BRTWOWAY_CC and make the branch
selector smarter about the code it generates, fixing a case in the
readme.

llvm-svn: 26814
2006-03-17 01:40:33 +00:00
Chris Lattner
3b4ceba4ff make sure dead token factor nodes are removed by the dag combiner.
llvm-svn: 26731
2006-03-13 18:37:30 +00:00
Chris Lattner
3a3c8682b5 Fold X+Y -> X|Y when safe. This implements:
Regression/CodeGen/PowerPC/and_add.ll

a case that occurs with dynamic allocas of constant size.

llvm-svn: 26727
2006-03-13 06:51:27 +00:00
Chris Lattner
9d0ebb55a6 add a couple of missing folds
llvm-svn: 26724
2006-03-13 06:26:26 +00:00
Chris Lattner
f2bed5f46e Reinstate this now that the offending opposite xform has been removed.
llvm-svn: 26548
2006-03-05 19:53:55 +00:00
Evan Cheng
e0a6cf78f8 Back out fold (shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2) for now.
It's causing an infinite loop compiling ldecod on x86 / Darwin.

llvm-svn: 26544
2006-03-05 07:30:16 +00:00
Chris Lattner
827e3f62b0 Add some simple copysign folds
llvm-svn: 26543
2006-03-05 05:30:57 +00:00
Chris Lattner
cd57ff4fb6 fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
fold (shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)

This allows us to compile CodeGen/PowerPC/addi-reassoc.ll into:

_test1:
        slwi r2, r4, 4
        add r2, r2, r3
        lwz r3, 36(r2)
        blr
_test2:
        mulli r2, r4, 5
        add r2, r2, r3
        lbz r2, 11(r2)
        extsb r3, r2
        blr

instead of:

_test1:
        addi r2, r4, 2
        slwi r2, r2, 4
        add r2, r3, r2
        lwz r3, 4(r2)
        blr
_test2:
        addi r2, r4, 2
        mulli r2, r2, 5
        add r2, r3, r2
        lbz r2, 1(r2)
        extsb r3, r2
        blr

llvm-svn: 26535
2006-03-04 23:33:26 +00:00
Chris Lattner
d45d3b68b0 Fix CodeGen/Generic/2006-03-01-dagcombineinfloop.ll, an infinite loop
in the dag combiner on 176.gcc on x86.

llvm-svn: 26459
2006-03-01 21:47:21 +00:00
Chris Lattner
5544c94289 Fix a typo evan noticed
llvm-svn: 26454
2006-03-01 19:55:35 +00:00
Chris Lattner
6434a21c21 Add support for target-specific dag combines
llvm-svn: 26443
2006-03-01 04:53:38 +00:00
Chris Lattner
f571b8c5dd Add a new AddToWorkList method, start using it
llvm-svn: 26441
2006-03-01 04:03:14 +00:00
Chris Lattner
7ba1c5e025 Pull shifts by a constant through multiplies (a form of reassociation),
implementing Regression/CodeGen/X86/mul-shift-reassoc.ll

llvm-svn: 26440
2006-03-01 03:44:24 +00:00
Evan Cheng
d8b92e04a2 Vector ops lowering.
llvm-svn: 26436
2006-03-01 01:09:54 +00:00
Chris Lattner
3aa1cd759e Compile:
unsigned foo4(unsigned short *P) { return *P & 255; }
unsigned foo5(short *P) { return *P & 255; }

to:

_foo4:
        lbz r3,1(r3)
        blr
_foo5:
        lbz r3,1(r3)
        blr

not:

_foo4:
        lhz r2, 0(r3)
        rlwinm r3, r2, 0, 24, 31
        blr
_foo5:
        lhz r2, 0(r3)
        rlwinm r3, r2, 0, 24, 31
        blr

llvm-svn: 26419
2006-02-28 06:49:37 +00:00
Chris Lattner
24a22066c4 Fold "and (LOAD P), 255" -> zextload. This allows us to compile:
unsigned foo3(unsigned *P) { return *P & 255; }
as:
_foo3:
        lbz r3, 3(r3)
        blr

instead of:

_foo3:
        lwz r2, 0(r3)
        rlwinm r3, r2, 0, 24, 31
        blr

and:

unsigned short foo2(float a) { return a; }

as:
_foo2:
        fctiwz f0, f1
        stfd f0, -8(r1)
        lhz r3, -2(r1)
        blr

instead of:

_foo2:
        fctiwz f0, f1
        stfd f0, -8(r1)
        lwz r2, -4(r1)
        rlwinm r3, r2, 0, 16, 31
        blr

llvm-svn: 26417
2006-02-28 06:35:35 +00:00
Chris Lattner
152201257d fold (sra (sra x, c1), c2) -> (sra x, c1+c2)
llvm-svn: 26416
2006-02-28 06:23:04 +00:00
Chris Lattner
baea29f33d remove some completed notes
llvm-svn: 26390
2006-02-27 00:39:31 +00:00
Chris Lattner
4b5130d957 Fix a problem Nate and Duraid reported where simplifying nodes can cause
them to get ressurected, in which case, deleting the undead nodes is
unfriendly.

llvm-svn: 26291
2006-02-20 06:51:04 +00:00
Nate Begeman
bbcae2bf3d Add checks to make sure we don't create bogus extend nodes, and fix a bug
where we were doing exactly that which was causing failures on x86 and
alpha.

llvm-svn: 26284
2006-02-18 02:40:58 +00:00
Chris Lattner
bd36f029ef Fix a tricky issue in the SimplifyDemandedBits code where CombineTo wasn't
exactly the API we wanted to call into.  This fixes the crash on crafty last
night.

llvm-svn: 26269
2006-02-17 21:58:01 +00:00
Nate Begeman
7ebd8fd80d Clean up DemandedBitsAreZero interface
Make more use of the new mask helpers in valuetypes.h
Combine (sra (srl x, c1), c1) -> sext_inreg if legal

llvm-svn: 26263
2006-02-17 19:54:08 +00:00
Nate Begeman
a3a0a0340f Don't expand sdiv by power of two before legalize, since it will likely
generate illegal nodes.

llvm-svn: 26261
2006-02-17 07:26:20 +00:00
Nate Begeman
9c0ab71f4a kill ADD_PARTS & SUB_PARTS and replace them with fancy new ADDC, ADDE, SUBC
and SUBE nodes that actually expose what's going on and allow for
significant simplifications in the targets.

llvm-svn: 26255
2006-02-17 05:43:56 +00:00
Nate Begeman
0bc71999b9 Rework the SelectionDAG-based implementations of SimplifyDemandedBits
and ComputeMaskedBits to match the new improved versions in instcombine.
Tested against all of multisource/benchmarks on ppc.

llvm-svn: 26238
2006-02-16 21:11:51 +00:00
Chris Lattner
6f407a54c1 Lowering of sdiv X, pow2 was broken, this fixes it. This patch is written
by Nate, I'm just committing it for him.

llvm-svn: 26230
2006-02-16 08:02:36 +00:00
Jim Laskey
e51152ac31 Should not combine ISD::LOCATIONs until we have scheme to remove from
MachineDebugInfo tables.

llvm-svn: 26216
2006-02-15 19:34:44 +00:00
Chris Lattner
50f604c041 Compile this:
xori r6, r2, 1
        rlwinm r6, r6, 0, 31, 31
        cmpwi cr0, r6, 0
        bne cr0, LBB1_3 ; endif

to this:

        rlwinm r6, r2, 0, 31, 31
        cmpwi cr0, r6, 0
        beq cr0, LBB1_3 ; endif

llvm-svn: 26047
2006-02-08 02:13:15 +00:00
Nate Begeman
2fe9dfedf0 Back out previous commit, it isn't safe.
llvm-svn: 26006
2006-02-05 08:23:00 +00:00
Nate Begeman
c0e2bb488a fold c1 << (x + c2) into (c1 << c2) << x. fix a warning.
llvm-svn: 26005
2006-02-05 08:07:24 +00:00
Nate Begeman
c93c3d11d6 Handle urem by shifted powers of 2.
llvm-svn: 26001
2006-02-05 07:36:48 +00:00
Nate Begeman
0291708a49 handle combining A / (B << N) into A >>u (log2(B)+N) when B is a power of 2
llvm-svn: 26000
2006-02-05 07:20:23 +00:00
Nate Begeman
2d9838ec9b Add a framework for eliminating instructions that produces undemanded bits.
llvm-svn: 25945
2006-02-03 22:24:05 +00:00
Nate Begeman
78c9e14249 Add common code for reassociating ops in the dag combiner
llvm-svn: 25934
2006-02-03 06:46:56 +00:00
Chris Lattner
456711ae45 Turn any_extend nodes into zero_extend nodes when it allows us to remove an
and instruction.  This allows us to compile stuff like this:

bool %X(int %X) {
        %Y = add int %X, 14
        %Z = setne int %Y, 12345
        ret bool %Z
}

to this:

_X:
        cmpl $12331, 4(%esp)
        setne %al
        movzbl %al, %eax
        ret

instead of this:

_X:
        cmpl $12331, 4(%esp)
        setne %al
        movzbl %al, %eax
        andl $1, %eax
        ret

This occurs quite a bit with the X86 backend.  For example, 25 times in
lambda, 30 times in 177.mesa, 14 times in galgel,  70 times in fma3d,
25 times in vpr, several hundred times in gcc, ~45 times in crafty,
~60 times in parser, ~140 times in eon, 110 times in perlbmk, 55 on gap,
16 times on bzip2, 14 times on twolf, and 1-2 times in many other SPEC2K
programs.

llvm-svn: 25901
2006-02-02 07:17:31 +00:00
Chris Lattner
8f4d73f3da add two dag combines:
(C1-X) == C2 --> X == C1-C2
(X+C1) == C2 --> X == C2-C1

This allows us to compile this:

bool %X(int %X) {
        %Y = add int %X, 14
        %Z = setne int %Y, 12345
        ret bool %Z
}

into this:

_X:
        cmpl $12331, 4(%esp)
        setne %al
        movzbl %al, %eax
        andl $1, %eax
        ret

not this:

_X:
        movl $14, %eax
        addl 4(%esp), %eax
        cmpl $12345, %eax
        setne %al
        movzbl %al, %eax
        andl $1, %eax
        ret

Testcase here: Regression/CodeGen/X86/compare-add.ll

nukage of the and coming up next.

llvm-svn: 25898
2006-02-02 06:36:13 +00:00
Nate Begeman
0be60963bd Fix some of the stuff in the PPC README file, and clean up legalization
of the SELECT_CC, BR_CC, and BRTWOWAY_CC nodes.

llvm-svn: 25875
2006-02-01 07:19:44 +00:00
Chris Lattner
a44182300b Move MaskedValueIsZero from the DAGCombiner to the TargetLowering interface,making isMaskedValueZeroForTargetNode simpler, and useable from other partsof the compiler.
llvm-svn: 25803
2006-01-30 04:09:27 +00:00
Chris Lattner
26589d32e6 pass the address of MaskedValueIsZero into isMaskedValueZeroForTargetNode,
to permit recursion

llvm-svn: 25799
2006-01-30 03:49:37 +00:00