1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00
Commit Graph

36 Commits

Author SHA1 Message Date
Chris Lattner
81742d7a8f add a note
llvm-svn: 50267
2008-04-25 17:25:00 +00:00
Nate Begeman
5f18794295 readme updates
llvm-svn: 47051
2008-02-13 07:06:12 +00:00
Chris Lattner
65291785c8 Add a note
llvm-svn: 27999
2006-04-28 00:04:05 +00:00
Chris Lattner
e307f43f35 add a note
llvm-svn: 27832
2006-04-19 16:22:38 +00:00
Chris Lattner
62537a04fb add a note
llvm-svn: 27828
2006-04-19 05:55:06 +00:00
Chris Lattner
5f153584d9 add a note
llvm-svn: 27809
2006-04-18 18:30:19 +00:00
Chris Lattner
44ea12c5f8 Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's.  Instead, just branch on CR6 directly. :)

For example, for:
void foo2(vector float *A, vector float *B) {
  if (!vec_any_eq(*A, *B))
    *B = (vector float){0,0,0,0};
}

We now generate:

_foo2:
        mfspr r2, 256
        oris r5, r2, 12288
        mtspr 256, r5
        lvx v2, 0, r4
        lvx v3, 0, r3
        vcmpeqfp. v2, v3, v2
        bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
        vxor v2, v2, v2
        stvx v2, 0, r4
        mtspr 256, r2
        blr
LBB1_2: ; UnifiedReturnBlock
        mtspr 256, r2
        blr

instead of:

_foo2:
        mfspr r2, 256
        oris r5, r2, 12288
        mtspr 256, r5
        lvx v2, 0, r4
        lvx v3, 0, r3
        vcmpeqfp. v2, v3, v2
        mfcr r3, 2
        rlwinm r3, r3, 27, 31, 31
        cmpwi cr0, r3, 0
        beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
        vxor v2, v2, v2
        stvx v2, 0, r4
        mtspr 256, r2
        blr
LBB1_2: ; UnifiedReturnBlock
        mtspr 256, r2
        blr

This implements CodeGen/PowerPC/vec_br_cmp.ll.

llvm-svn: 27804
2006-04-18 17:59:36 +00:00
Chris Lattner
519001b0ee move some stuff around, clean things up
llvm-svn: 27802
2006-04-18 17:52:36 +00:00
Chris Lattner
5951b60cb4 Implement v16i8 multiply with this code:
vmuloub v5, v3, v2
        vmuleub v2, v3, v2
        vperm v2, v2, v5, v4

This implements CodeGen/PowerPC/vec_mul.ll.  With this, v16i8 multiplies are
6.79x faster than before.

Overall, UnitTests/Vector/multiplies.c is now 2.45x faster with LLVM than with
GCC.

Remove the 'integer multiplies' todo from the README file.

llvm-svn: 27792
2006-04-18 03:57:35 +00:00
Chris Lattner
81938fa3db remove done item
llvm-svn: 27778
2006-04-17 21:52:03 +00:00
Chris Lattner
7d66e5a118 add a note
llvm-svn: 27758
2006-04-17 17:29:41 +00:00
Chris Lattner
5367a73dec Implement a TODO: for any shuffle that can be viewed as a v4[if]32 shuffle,
if it can be implemented in 3 or fewer discrete altivec instructions, codegen
it as such.  This implements Regression/CodeGen/PowerPC/vec_perf_shuffle.ll

llvm-svn: 27748
2006-04-17 05:28:54 +00:00
Chris Lattner
d86516991a Implement a TODO: have the legalizer canonicalize a bunch of operations to
one type (v4i32) so that we don't have to write patterns for each type, and
so that more CSE opportunities are exposed.

llvm-svn: 27731
2006-04-16 01:37:57 +00:00
Chris Lattner
f4126f0db7 Make the BUILD_VECTOR lowering code much more aggressive w.r.t constant vectors.
Remove some done items from the todo list.

llvm-svn: 27729
2006-04-16 01:01:29 +00:00
Chris Lattner
cec07adf4d add a note, move an altivec todo to the altivec list.
llvm-svn: 27654
2006-04-13 16:48:00 +00:00
Chris Lattner
e087b8e321 Add a new way to match vector constants, which make it easier to bang bits of
different types.

Codegen spltw(0x7FFFFFFF) and spltw(0x80000000) without a constant pool load,
implementing PowerPC/vec_constants.ll:test1.  This compiles:

typedef float vf __attribute__ ((vector_size (16)));
typedef int vi __attribute__ ((vector_size (16)));
void test(vi *P1, vi *P2, vf *P3) {
  *P1 &= (vi){0x80000000,0x80000000,0x80000000,0x80000000};
  *P2 &= (vi){0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF};
  *P3 = vec_abs((vector float)*P3);
}

to:

_test:
        mfspr r2, 256
        oris r6, r2, 49152
        mtspr 256, r6
        vspltisw v0, -1
        vslw v0, v0, v0
        lvx v1, 0, r3
        vand v1, v1, v0
        stvx v1, 0, r3
        lvx v1, 0, r4
        vandc v1, v1, v0
        stvx v1, 0, r4
        lvx v1, 0, r5
        vandc v0, v1, v0
        stvx v0, 0, r5
        mtspr 256, r2
        blr

instead of (with two constant pool entries):

_test:
        mfspr r2, 256
        oris r6, r2, 49152
        mtspr 256, r6
        li r6, lo16(LCPI1_0)
        lis r7, ha16(LCPI1_0)
        li r8, lo16(LCPI1_1)
        lis r9, ha16(LCPI1_1)
        lvx v0, r7, r6
        lvx v1, 0, r3
        vand v0, v1, v0
        stvx v0, 0, r3
        lvx v0, r9, r8
        lvx v1, 0, r4
        vand v1, v1, v0
        stvx v1, 0, r4
        lvx v1, 0, r5
        vand v0, v1, v0
        stvx v0, 0, r5
        mtspr 256, r2
        blr

GCC produces (with 2 cp entries):

_test:
        mfspr r0,256
        stw r0,-4(r1)
        oris r0,r0,0xc00c
        mtspr 256,r0
        lis r2,ha16(LC0)
        lis r9,ha16(LC1)
        la r2,lo16(LC0)(r2)
        lvx v0,0,r3
        lvx v1,0,r5
        la r9,lo16(LC1)(r9)
        lwz r12,-4(r1)
        lvx v12,0,r2
        lvx v13,0,r9
        vand v0,v0,v12
        stvx v0,0,r3
        vspltisw v0,-1
        vslw v12,v0,v0
        vandc v1,v1,v12
        stvx v1,0,r5
        lvx v0,0,r4
        vand v0,v0,v13
        stvx v0,0,r4
        mtspr 256,r12
        blr

llvm-svn: 27624
2006-04-12 19:07:14 +00:00
Chris Lattner
0e63e916b3 we have a shuffle instr, add an example.
llvm-svn: 27592
2006-04-11 18:47:03 +00:00
Chris Lattner
db7dfe8c61 Add an item
llvm-svn: 27470
2006-04-06 23:16:19 +00:00
Chris Lattner
300076cbd8 Pattern match vmrg* instructions, which are now lowered by the CFE into shuffles.
llvm-svn: 27457
2006-04-06 21:11:54 +00:00
Chris Lattner
6cf87c1b01 remove two done items
llvm-svn: 27453
2006-04-06 19:19:38 +00:00
Chris Lattner
2875bb116e Support pattern matching vsldoi(x,y) and vsldoi(x,x), which allows the f.e. to
lower it and LLVM to have one fewer intrinsic.  This implements
CodeGen/PowerPC/vec_shuffle.ll

llvm-svn: 27450
2006-04-06 18:26:28 +00:00
Chris Lattner
7f13e50435 Add all of the data stream intrinsics and instructions. woo
llvm-svn: 27442
2006-04-05 22:27:14 +00:00
Chris Lattner
993209029f add vmladduhm
llvm-svn: 27423
2006-04-05 00:49:48 +00:00
Chris Lattner
10394b1c42 add a note
llvm-svn: 27419
2006-04-04 23:45:11 +00:00
Chris Lattner
59c4add58a add a note
llvm-svn: 27414
2006-04-04 22:43:55 +00:00
Chris Lattner
8967316b8c Remove done item
llvm-svn: 27351
2006-04-02 05:28:54 +00:00
Chris Lattner
9c24ec6de5 add a note
llvm-svn: 27348
2006-04-02 03:59:11 +00:00
Chris Lattner
d27ced882b add a note
llvm-svn: 27302
2006-03-31 19:00:22 +00:00
Chris Lattner
95d358dbdb Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction.  For example, for:

void test(vector float *x, vector float *y, int *P) {
  int v = vec_any_out(*x, *y);
  *x = (vector float)vec_cmpb(*x, *y);
  *P = v;
}

we now generate:

_test:
        mfspr r2, 256
        oris r6, r2, 49152
        mtspr 256, r6
        lvx v0, 0, r4
        lvx v1, 0, r3
        vcmpbfp. v0, v1, v0
        mfcr r4, 2
        stvx v0, 0, r3
        rlwinm r3, r4, 27, 31, 31
        xori r3, r3, 1
        stw r3, 0(r5)
        mtspr 256, r2
        blr

instead of:

_test:
        mfspr r2, 256
        oris r6, r2, 57344
        mtspr 256, r6
        lvx v0, 0, r4
        lvx v1, 0, r3
        vcmpbfp. v2, v1, v0
        mfcr r4, 2
***     vcmpbfp v0, v1, v0
        rlwinm r4, r4, 27, 31, 31
        stvx v0, 0, r3
        xori r3, r4, 1
        stw r3, 0(r5)
        mtspr 256, r2
        blr

Testcase here: CodeGen/PowerPC/vcmp-fold.ll

llvm-svn: 27290
2006-03-31 06:02:07 +00:00
Chris Lattner
a7a7c035b3 These are done
llvm-svn: 27284
2006-03-31 04:53:21 +00:00
Chris Lattner
1a773f8f18 add a note
llvm-svn: 27243
2006-03-29 00:24:13 +00:00
Chris Lattner
93559450b8 add a note
llvm-svn: 27227
2006-03-28 18:56:23 +00:00
Nate Begeman
5a82c8ccbd Add a few more altivec intrinsics
llvm-svn: 27215
2006-03-28 04:15:58 +00:00
Chris Lattner
a570305421 implement a bunch more intrinsics.
llvm-svn: 27209
2006-03-28 02:29:37 +00:00
Chris Lattner
dab8425129 Add a bunch of notes from my journey thus far.
llvm-svn: 27170
2006-03-27 07:41:00 +00:00
Chris Lattner
f1d6a9483f Split out altivec notes into their own README
llvm-svn: 27168
2006-03-27 07:04:16 +00:00