1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-28 14:32:51 +01:00
Commit Graph

596 Commits

Author SHA1 Message Date
Duncan Sands
b3b1ae18ab Crashes llc when using Chris's new legalization logic.
llvm-svn: 45758
2008-01-08 21:51:53 +00:00
Chris Lattner
aeab9aefb3 remove darwin/i386 t-t
llvm-svn: 45743
2008-01-08 06:52:51 +00:00
Chris Lattner
cafc567fb7 Finally implement correct ordered comparisons for PPC, even though
the code generated is not wonderful.  This turns a miscompilation into
a code quality bug (noted in the ppc readme).  This fixes PR642, which
is over 2 years old (!).  Nate, please review this.

llvm-svn: 45742
2008-01-08 06:46:30 +00:00
Nate Begeman
98dba4b0ce Update test to catch recent x86 insert regression and improvements
llvm-svn: 45705
2008-01-07 17:49:23 +00:00
Gordon Henriksen
edbfece273 Setting GlobalDirective in TargetAsmInfo by default rather than
providing a misleading facility. It's used once in the MIPS backend
and hardcoded as "\t.globl\t" everywhere else.

llvm-svn: 45676
2008-01-07 02:31:11 +00:00
Gordon Henriksen
db4f51e1b9 With this patch, the LowerGC transformation becomes the
ShadowStackCollector, which additionally has reduced overhead with
no sacrifice in portability.

Considering a function @fun with 8 loop-local roots,
ShadowStackCollector introduces the following overhead
(x86):

; shadowstack prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    $___gc_fun, 20(%esp)
        movl    $0, 24(%esp)
        movl    $0, 28(%esp)
        movl    $0, 32(%esp)
        movl    $0, 36(%esp)
        movl    $0, 40(%esp)
        movl    $0, 44(%esp)
        movl    $0, 48(%esp)
        movl    $0, 52(%esp)
        movl    %ecx, 16(%esp)
        leal    16(%esp), %ecx
        movl    %ecx, (%eax)

; shadowstack loop overhead
        (none)

; shadowstack epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)

; shadowstack metadata
        .align  3
___gc_fun:                              # __gc_fun
        .long   8
        .space  4

In comparison to LowerGC:

; lowergc prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    %ecx, 48(%esp)
        movl    $8, 52(%esp)
        movl    $0, 60(%esp)
        movl    $0, 56(%esp)
        movl    $0, 68(%esp)
        movl    $0, 64(%esp)
        movl    $0, 76(%esp)
        movl    $0, 72(%esp)
        movl    $0, 84(%esp)
        movl    $0, 80(%esp)
        movl    $0, 92(%esp)
        movl    $0, 88(%esp)
        movl    $0, 100(%esp)
        movl    $0, 96(%esp)
        movl    $0, 108(%esp)
        movl    $0, 104(%esp)
        movl    $0, 116(%esp)
        movl    $0, 112(%esp)

; lowergc loop overhead
        leal    44(%esp), %eax
        movl    %eax, 56(%esp)
        leal    40(%esp), %eax
        movl    %eax, 64(%esp)
        leal    36(%esp), %eax
        movl    %eax, 72(%esp)
        leal    32(%esp), %eax
        movl    %eax, 80(%esp)
        leal    28(%esp), %eax
        movl    %eax, 88(%esp)
        leal    24(%esp), %eax
        movl    %eax, 96(%esp)
        leal    20(%esp), %eax
        movl    %eax, 104(%esp)
        leal    16(%esp), %eax
        movl    %eax, 112(%esp)

; lowergc epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)

; lowergc metadata
        (none)

llvm-svn: 45670
2008-01-07 01:30:53 +00:00
Chris Lattner
7d567adef9 fix this to use a valid triple.
llvm-svn: 45509
2008-01-02 22:21:45 +00:00
Chris Lattner
fbd8cc03c8 verify that aligned common support doesn't break.
llvm-svn: 45495
2008-01-02 19:48:24 +00:00
Duncan Sands
8a4882564a Fix PR1833 - eh.exception and eh.selector return two
values, which means doing extra legalization work.
It would be easier to get this kind of thing right if
there was some documentation...

llvm-svn: 45472
2007-12-31 18:35:50 +00:00
Chris Lattner
d55e743cfe One readme entry is done, one is really easy (Evan, want to investigate
eliminating the llvm.x86.sse2.loadl.pd intrinsic?), one shuffle optzn
may be done (if shufps is better than pinsw, Evan, please review), and
we already know about LICM of simple instructions.

llvm-svn: 45407
2007-12-29 19:31:47 +00:00
Chris Lattner
ed55329cc9 upgrade this test
llvm-svn: 45406
2007-12-29 19:24:06 +00:00
Chris Lattner
cd147e5596 Fold comparisons against a constant nan, and optimize ORD/UNORD
comparisons with a constant.  This allows us to compile isnan to:

_foo:
	fcmpu cr7, f1, f1
	mfcr r2
	rlwinm r3, r2, 0, 31, 31
	blr 

instead of:

LCPI1_0:					;  float
	.space	4
_foo:
	lis r2, ha16(LCPI1_0)
	lfs f0, lo16(LCPI1_0)(r2)
	fcmpu cr7, f1, f0
	mfcr r2
	rlwinm r3, r2, 0, 31, 31
	blr 

llvm-svn: 45405
2007-12-29 08:37:08 +00:00
Chris Lattner
b36a4a7a84 this xform is implemented.
llvm-svn: 45404
2007-12-29 08:19:39 +00:00
Chris Lattner
f8e408b7b1 Codegen:
as:

_bar:
	pushl	%esi
	subl	$8, %esp
	movl	16(%esp), %esi
	call	L_foo$stub
	fstps	(%esi)
	addl	$8, %esp
	popl	%esi
	#FP_REG_KILL
	ret

instead of:

_bar:
	pushl	%esi
	subl	$8, %esp
	movl	16(%esp), %esi
	call	L_foo$stub
	fstpl	(%esi)
	cvtsd2ss	(%esi), %xmm0
	movss	%xmm0, (%esi)
	addl	$8, %esp
	popl	%esi
	#FP_REG_KILL
	ret

llvm-svn: 45401
2007-12-29 06:57:38 +00:00
Chris Lattner
e3515220d2 avoid going through a stack slot to convert from fpstack to xmm reg
if we are just going to store it back anyway.  This improves things 
like:
double foo();
void bar(double *P) { *P = foo(); }

llvm-svn: 45399
2007-12-29 06:41:28 +00:00
Chris Lattner
a432f12b76 one fewer uncond branch with my codegenprepare hack for single-mbb backedges.
llvm-svn: 45360
2007-12-26 17:23:47 +00:00
Gordon Henriksen
e8226d70a9 Tests for changes made in r45356, where IPO optimizations would drop
collector algorithms.

llvm-svn: 45357
2007-12-26 02:47:37 +00:00
Gordon Henriksen
c0a3899bbf GC poses hazards to the inliner. Consider:
define void @f() {
            ...
            call i32 @g()
            ...
    }

    define void @g() {
            ...
    }

The hazards are:

  - @f and @g have GC, but they differ GC. Inlining is invalid. This
    may never occur.
  - @f has no GC, but @g does. g's GC must be propagated to @f.

The other scenarios are safe:

  - @f and @g have the same GC.
  - @f and @g have no GC.
  - @g has no GC.

This patch adds inliner checks for the former two scenarios.

llvm-svn: 45351
2007-12-25 03:10:07 +00:00
Gordon Henriksen
a9f4ed4070 Noting and enforcing that GC intrinsics are valid only within a
function with GC.

This will catch the error when the inliner inlines a function with
GC into a caller with no GC.

llvm-svn: 45350
2007-12-25 02:31:26 +00:00
Gordon Henriksen
44841db057 Adjusting verification of "llvm.gc*" intrinsic prototypes to match
LangRef.

llvm-svn: 45349
2007-12-25 02:02:10 +00:00
Evan Cheng
18c39c03a7 Remove xfail. This is fixed.
llvm-svn: 45254
2007-12-20 02:25:21 +00:00
Scott Michel
5cbdbd26a8 More working CellSPU tests:
- vec_const.ll: Vector constant loads
- immed64.ll: i64, f64 constant loads

llvm-svn: 45242
2007-12-20 00:44:13 +00:00
Scott Michel
83ac96e27d CellSPU testcase, extract_elt.ll: extract vector element.
llvm-svn: 45219
2007-12-19 21:17:42 +00:00
Scott Michel
686bbd9b19 More working CellSPU test cases:
- call.ll: Function call
- ctpop.ll: Count population
- dp_farith.ll: DP arithmetic
- eqv.ll: Equivalence primitives
- fcmp.ll: SP comparisons
- fdiv.ll: SP division
- fneg-fabs.ll: SP negation, aboslute value
- int2fp.ll: Integer -> SP conversion
- rotate_ops.ll: Rotation primitives
- select_bits.ll: (a & c) | (b & ~c) bit selection
- shift_ops.ll: Shift primitives
- sp_farith.ll: SP arithmentic

llvm-svn: 45217
2007-12-19 20:50:49 +00:00
Scott Michel
6cb9f6d20c Two more test cases: or_ops.ll (arithmetic or operations) and vecinsert.ll
(vector insertions)

llvm-svn: 45216
2007-12-19 20:15:47 +00:00
Scott Michel
d4d96bb6f6 Add new immed16.ll test case, fix CellSPU errata to make test case work.
llvm-svn: 45196
2007-12-19 07:35:06 +00:00
Evan Cheng
8824950e8f Fix PR1872: SrcValue and SrcValueOffset should not be used to compute load / store node id.
llvm-svn: 45167
2007-12-18 19:38:14 +00:00
Evan Cheng
36bfae49e3 FIX for PR1799: When a load is unfolded from an instruction, check if it is a new node. If not, do not create a new SUnit.
llvm-svn: 45157
2007-12-18 08:42:10 +00:00
Scott Michel
94d7a2f1f2 i32 immediate constant test case for CellSPU
llvm-svn: 45134
2007-12-17 23:45:52 +00:00
Scott Michel
4f980e1acd - Restore some i8 functionality in CellSPU
- New test case: nand.ll

llvm-svn: 45130
2007-12-17 22:32:34 +00:00
Duncan Sands
3a0d757bd5 Make invokes of inline asm legal. Teach codegen
how to lower them (with no attempt made to be
efficient, since they should only occur for
unoptimized code).

llvm-svn: 45108
2007-12-17 18:08:19 +00:00
Evan Cheng
1d95b669b6 Make better use of instructions that clear high bits; fix various 2-wide shuffle bugs.
llvm-svn: 45058
2007-12-15 03:00:47 +00:00
Scott Michel
307f334014 Start committing working test cases for CellSPU.
llvm-svn: 45050
2007-12-15 00:38:50 +00:00
Evan Cheng
6909ff8c4b Fix ctlz and cttz. llvm definition requires them to return number of bits in of the src type when value is zero.
llvm-svn: 45029
2007-12-14 08:30:15 +00:00
Evan Cheng
51cf86ded0 Implement ctlz and cttz with bsr and bsf.
llvm-svn: 45024
2007-12-14 02:13:44 +00:00
Evan Cheng
a152909956 Be extra careful with extension use optimation. Now turned on by default.
llvm-svn: 44981
2007-12-13 03:32:53 +00:00
Evan Cheng
343929c773 Fold some and + shift in x86 addressing mode.
llvm-svn: 44970
2007-12-13 00:43:27 +00:00
Evan Cheng
64a1febf9a Implicit def instructions, e.g. X86::IMPLICIT_DEF_GR32, are always re-materializable and they should not be spilled.
llvm-svn: 44960
2007-12-12 23:12:09 +00:00
Dan Gohman
0075ea1f5f Allow vector integer constants to be created with
SelectionDAG::getConstant, in the same way as vector floating-point
constants. This allows the legalize expansion code for @llvm.ctpop and
friends to be usable with vector types.

llvm-svn: 44954
2007-12-12 22:21:26 +00:00
Evan Cheng
ad3e7f3286 Use shuffles to implement insert_vector_elt for i32, i64, f32, and f64.
llvm-svn: 44929
2007-12-12 07:55:34 +00:00
Evan Cheng
af6ba4dfd4 Add a test case for -optimize-ext-uses.
llvm-svn: 44928
2007-12-12 07:54:08 +00:00
Evan Cheng
d36d69fe92 Lower a build_vector with all constants into a constpool load unless it can be done with a move to low part.
llvm-svn: 44921
2007-12-12 06:45:40 +00:00
Evan Cheng
f6c2838f36 - Improved v8i16 shuffle lowering. It now uses pshuflw and pshufhw as much as
possible before resorting to pextrw and pinsrw.
- Better codegen for v4i32 shuffles masquerading as v8i16 or v16i8 shuffles.
- Improves (i16 extract_vector_element 0) codegen by recognizing
  (i32 extract_vector_element 0) does not require a pextrw.

llvm-svn: 44836
2007-12-11 01:46:18 +00:00
Christopher Lamb
5c577eb543 Improve branch folding by recgonizing that explict successor relationships impact the value of fall-through choices.
llvm-svn: 44785
2007-12-10 07:24:06 +00:00
Gordon Henriksen
5d201e0bcc Adding a collector name attribute to Function in the IR. These
methods are new to Function:

  bool hasCollector() const;
  const std::string &getCollector() const;
  void setCollector(const std::string &);
  void clearCollector();

The assembly representation is as such:

  define void @f() gc "shadow-stack" { ...

The implementation uses an on-the-side table to map Functions to 
collector names, such that there is no overhead. A StringPool is 
further used to unique collector names, which are extremely
likely to be unique per process.

llvm-svn: 44769
2007-12-10 03:18:06 +00:00
Gordon Henriksen
64016be9ea Upgrading this test to 2.0 .ll syntax.
llvm-svn: 44738
2007-12-09 15:03:01 +00:00
Chris Lattner
e93a775a4d Fix a significant code quality regression I introduced on PPC64 quite
a while ago.  We now produce:

_foo:
	mflr r0
	std r0, 16(r1)
	ld r2, 16(r1)
	std r2, 0(r3)
	ld r0, 16(r1)
	mtlr r0
	blr 

instead of:

_foo:
	mflr r0
	std r0, 16(r1)
	lis r0, 0
	ori r0, r0, 16
	ldx r2, r1, r0
	std r2, 0(r3)
	ld r0, 16(r1)
	mtlr r0
	blr 

for:

void foo(void **X) {
  *X = __builtin_return_address(0);
}

on ppc64.

llvm-svn: 44701
2007-12-08 07:04:58 +00:00
Chris Lattner
e16166b78d implement __builtin_return_addr(0) on ppc.
llvm-svn: 44700
2007-12-08 06:59:59 +00:00
Evan Cheng
34c7b35135 Much improved v8i16 shuffles. (Step 1).
llvm-svn: 44676
2007-12-07 08:07:39 +00:00
Evan Cheng
8d1f8b2f27 New test case.
llvm-svn: 44672
2007-12-07 01:48:46 +00:00