1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00
Commit Graph

17 Commits

Author SHA1 Message Date
Chris Lattner
f96fb0c946 Don't print stuff out from the code generator. This broke the JIT horribly
last night. :)  bork!

llvm-svn: 17093
2004-10-17 17:40:50 +00:00
Chris Lattner
63e6bdd207 Rewrite support for cast uint -> FP. In particular, we used to compile this:
double %test(uint %X) {
        %tmp.1 = cast uint %X to double         ; <double> [#uses=1]
        ret double %tmp.1
}

into:

test:
        sub %ESP, 8
        mov %EAX, DWORD PTR [%ESP + 12]
        mov %ECX, 0
        mov DWORD PTR [%ESP], %EAX
        mov DWORD PTR [%ESP + 4], %ECX
        fild QWORD PTR [%ESP]
        add %ESP, 8
        ret

... which basically zero extends to 8 bytes, then does an fild for an
8-byte signed int.

Now we generate this:


test:
        sub %ESP, 4
        mov %EAX, DWORD PTR [%ESP + 8]
        mov DWORD PTR [%ESP], %EAX
        fild DWORD PTR [%ESP]
        shr %EAX, 31
        fadd DWORD PTR [.CPItest_0 + 4*%EAX]
        add %ESP, 4
        ret

        .section .rodata
        .align  4
.CPItest_0:
        .quad   5728578726015270912

This does a 32-bit signed integer load, then adds in an offset if the sign
bit of the integer was set.

It turns out that this is substantially faster than the preceeding sequence.
Consider this testcase:

unsigned a[2]={1,2};
volatile double G;

void main() {
    int i;
    for (i=0; i<100000000; ++i )
        G += a[i&1];
}

On zion (a P4 Xeon, 3Ghz), this patch speeds up the testcase from 2.140s
to 0.94s.

On apoc, an athlon MP 2100+, this patch speeds up the testcase from 1.72s
to 1.34s.

Note that the program takes 2.5s/1.97s on zion/apoc with GCC 3.3 -O3
-fomit-frame-pointer.

llvm-svn: 17083
2004-10-17 08:01:28 +00:00
Chris Lattner
2fdca0bc02 fold:
%X = and Y, constantint
  %Z = setcc %X, 0

instead of emitting:

        and %EAX, 3
        test %EAX, %EAX
        je .LBBfoo2_2   # UnifiedReturnBlock

We now emit:

        test %EAX, 3
        je .LBBfoo2_2   # UnifiedReturnBlock

This triggers 581 times on 176.gcc for example.

llvm-svn: 17080
2004-10-17 06:10:40 +00:00
Chris Lattner
ae2e5f4de1 Teach the X86 backend about unreachable and undef. Among other things, we
now compile:

'foo() {}' into "ret" instead of "mov EAX, 0; ret"

llvm-svn: 17049
2004-10-16 18:13:05 +00:00
Chris Lattner
25b5777485 Instruction select globals with offsets better. For example, on this test
case:

int C[100];
int foo() {
  return C[4];
}

We now codegen:

foo:
        mov %EAX, DWORD PTR [C + 16]
        ret

instead of:

foo:
        mov %EAX, OFFSET C
        mov %EAX, DWORD PTR [%EAX + 16]
        ret

Other impressive features may be coming later.

This patch is contributed by Jeff Cohen!

llvm-svn: 17011
2004-10-15 05:05:29 +00:00
Chris Lattner
1291307d27 Fix a major regression from the bugfix for 2004-10-08-SelectSetCCFold.llx,
which prevented setcc's from being folded into branches.  It appears that
conditional branchinst's CC operand is actually operand(2), not operand(0)
as we might expect. :(

llvm-svn: 16859
2004-10-08 22:24:31 +00:00
Chris Lattner
1ac1e54bf9 Fix bug: 2004-10-08-SelectSetCCFold.llx. Normally this is hidden by the
instcombine xform, which is why we didn't notice it before.

llvm-svn: 16840
2004-10-08 16:34:13 +00:00
Chris Lattner
82aa8544a5 Remove debugging code, fix encoding problem. This fixes the problems
the JIT had last night.

llvm-svn: 16766
2004-10-06 14:31:50 +00:00
Chris Lattner
b0e465f0cb Codegen signed mod by 2 or -2 more efficiently. Instead of generating:
t:
        mov %EDX, DWORD PTR [%ESP + 4]
        mov %ECX, 2
        mov %EAX, %EDX
        sar %EDX, 31
        idiv %ECX
        mov %EAX, %EDX
        ret

Generate:
t:
        mov %ECX, DWORD PTR [%ESP + 4]
***     mov %EAX, %ECX
        cdq
        and %ECX, 1
        xor %ECX, %EDX
        sub %ECX, %EDX
***     mov %EAX, %ECX
        ret

Note that the two marked moves are redundant, and should be eliminated by the
register allocator, but aren't.

Compare this to GCC, which generates:

t:
        mov     %eax, DWORD PTR [%esp+4]
        mov     %edx, %eax
        shr     %edx, 31
        lea     %ecx, [%edx+%eax]
        and     %ecx, -2
        sub     %eax, %ecx
        ret

or ICC 8.0, which generates:

t:
        movl      4(%esp), %ecx                                 #3.5
        movl      $-2147483647, %eax                            #3.25
        imull     %ecx                                          #3.25
        movl      %ecx, %eax                                    #3.25
        sarl      $31, %eax                                     #3.25
        addl      %ecx, %edx                                    #3.25
        subl      %edx, %eax                                    #3.25
        addl      %eax, %eax                                    #3.25
        negl      %eax                                          #3.25
        subl      %eax, %ecx                                    #3.25
        movl      %ecx, %eax                                    #3.25
        ret                                                     #3.25

We would be in great shape if not for the moves.

llvm-svn: 16763
2004-10-06 05:01:07 +00:00
Chris Lattner
09b6b3f514 Fix a scary bug with signed division by a power of two. We used to generate:
s:   ;; X / 4
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        sar %ECX, 1
        shr %ECX, 30
        mov %EDX, %EAX
        add %EDX, %ECX
        sar %EAX, 2
        ret

When we really meant:

s:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        sar %ECX, 1
        shr %ECX, 30
        add %EAX, %ECX
        sar %EAX, 2
        ret

Hey, this also reduces register pressure too :)

llvm-svn: 16761
2004-10-06 04:19:43 +00:00
Chris Lattner
9258948b08 Codegen signed divides by 2 and -2 more efficiently. In particular
instead of:

s:   ;; X / 2
        movl 4(%esp), %eax
        movl %eax, %ecx
        shrl $31, %ecx
        movl %eax, %edx
        addl %ecx, %edx
        sarl $1, %eax
        ret

t:   ;; X / -2
        movl 4(%esp), %eax
        movl %eax, %ecx
        shrl $31, %ecx
        movl %eax, %edx
        addl %ecx, %edx
        sarl $1, %eax
        negl %eax
        ret

Emit:

s:
        movl 4(%esp), %eax
        cmpl $-2147483648, %eax
        sbbl $-1, %eax
        sarl $1, %eax
        ret

t:
        movl 4(%esp), %eax
        cmpl $-2147483648, %eax
        sbbl $-1, %eax
        sarl $1, %eax
        negl %eax
        ret

llvm-svn: 16760
2004-10-06 04:02:39 +00:00
Misha Brukman
e877aacbaa s/ISel/X86ISel/ to have unique class names for debugging via gdb because the C++
front-end in gcc does not mangle classes in anonymous namespaces correctly.

llvm-svn: 16469
2004-09-21 18:21:21 +00:00
Reid Spencer
c6a8d70cff Convert code to compile with vc7.1.
Patch contributed by Paolo Invernizzi. Thanks Paolo!

llvm-svn: 16368
2004-09-15 17:06:42 +00:00
Reid Spencer
c4abcbefb1 Changes For Bug 352
Move include/Config and include/Support into include/llvm/Config,
include/llvm/ADT and include/llvm/Support. From here on out, all LLVM
public header files must be under include/llvm/.

llvm-svn: 16137
2004-09-01 22:55:40 +00:00
Reid Spencer
59cb27bcdc Reduce the number of arguments in the instruction builder and make some
improvements on instruction selection that account for register and frame
index bases.

Patch contributed by Jeff Cohen. Thanks Jeff!

llvm-svn: 16110
2004-08-30 00:13:26 +00:00
Misha Brukman
61ff8a374f Fix file header as it has been renamed.
llvm-svn: 15239
2004-07-26 18:45:48 +00:00
Misha Brukman
ccd1114518 Renamed files to have the `X86' prefix for uniqueness purposes.
All CVS history was renamed, the *,v were copied over.  No worries.

llvm-svn: 15238
2004-07-26 18:43:11 +00:00