1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-24 21:42:54 +02:00
Commit Graph

14573 Commits

Author SHA1 Message Date
Nate Begeman
7b4fe83ba8 Implement floating point select for lt, gt, le, ge using the powerpc fsel
instruction.

Now, rather than emitting the following loop out of bisect:
.LBB_main_19:	; no_exit.0.i
	rlwinm r3, r2, 3, 0, 28
	lfdx f1, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f2, f2, f1
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
	fcmpu cr0, f1, f4
	bge .LBB_main_64	; no_exit.0.i
.LBB_main_63:	; no_exit.0.i
	b .LBB_main_65	; no_exit.0.i
.LBB_main_64:	; no_exit.0.i
	fmr f2, f1
.LBB_main_65:	; no_exit.0.i
	addi r3, r2, 1
	rlwinm r3, r3, 3, 0, 28
	lfdx f1, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f4, f4, f1
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f5, lo16(.CPI_main_1-"L00000$pb")(r3)
	fcmpu cr0, f1, f5
	bge .LBB_main_67	; no_exit.0.i
.LBB_main_66:	; no_exit.0.i
	b .LBB_main_68	; no_exit.0.i
.LBB_main_67:	; no_exit.0.i
	fmr f4, f1
.LBB_main_68:	; no_exit.0.i
	fadd f1, f2, f4
	addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
	lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
	fmul f1, f1, f2
	rlwinm r3, r2, 3, 0, 28
	lfdx f2, r3, r28
	fadd f4, f2, f1
	fcmpu cr0, f4, f0
	bgt .LBB_main_70	; no_exit.0.i
.LBB_main_69:	; no_exit.0.i
	b .LBB_main_71	; no_exit.0.i
.LBB_main_70:	; no_exit.0.i
	fmr f0, f4
.LBB_main_71:	; no_exit.0.i
	fsub f1, f2, f1
	addi r2, r2, -1
	fcmpu cr0, f1, f3
	blt .LBB_main_73	; no_exit.0.i
.LBB_main_72:	; no_exit.0.i
	b .LBB_main_74	; no_exit.0.i
.LBB_main_73:	; no_exit.0.i
	fmr f3, f1
.LBB_main_74:	; no_exit.0.i
	cmpwi cr0, r2, -1
	fmr f16, f0
	fmr f17, f3
	bgt .LBB_main_19	; no_exit.0.i

We emit this instead:
.LBB_main_19:	; no_exit.0.i
	rlwinm r3, r2, 3, 0, 28
	lfdx f1, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f2, f2, f1
	fsel f1, f1, f1, f2
	addi r3, r2, 1
	rlwinm r3, r3, 3, 0, 28
	lfdx f2, r3, r27
	addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
	lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
	fsub f4, f4, f2
	fsel f2, f2, f2, f4
	fadd f1, f1, f2
	addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
	lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
	fmul f1, f1, f2
	rlwinm r3, r2, 3, 0, 28
	lfdx f2, r3, r28
	fadd f4, f2, f1
	fsub f5, f0, f4
	fsel f0, f5, f0, f4
	fsub f1, f2, f1
	addi r2, r2, -1
	fsub f2, f1, f3
	fsel f3, f2, f3, f1
	cmpwi cr0, r2, -1
	fmr f16, f0
	fmr f17, f3
	bgt .LBB_main_19	; no_exit.0.i

llvm-svn: 16764
2004-10-06 09:53:04 +00:00
Chris Lattner
b0e465f0cb Codegen signed mod by 2 or -2 more efficiently. Instead of generating:
t:
        mov %EDX, DWORD PTR [%ESP + 4]
        mov %ECX, 2
        mov %EAX, %EDX
        sar %EDX, 31
        idiv %ECX
        mov %EAX, %EDX
        ret

Generate:
t:
        mov %ECX, DWORD PTR [%ESP + 4]
***     mov %EAX, %ECX
        cdq
        and %ECX, 1
        xor %ECX, %EDX
        sub %ECX, %EDX
***     mov %EAX, %ECX
        ret

Note that the two marked moves are redundant, and should be eliminated by the
register allocator, but aren't.

Compare this to GCC, which generates:

t:
        mov     %eax, DWORD PTR [%esp+4]
        mov     %edx, %eax
        shr     %edx, 31
        lea     %ecx, [%edx+%eax]
        and     %ecx, -2
        sub     %eax, %ecx
        ret

or ICC 8.0, which generates:

t:
        movl      4(%esp), %ecx                                 #3.5
        movl      $-2147483647, %eax                            #3.25
        imull     %ecx                                          #3.25
        movl      %ecx, %eax                                    #3.25
        sarl      $31, %eax                                     #3.25
        addl      %ecx, %edx                                    #3.25
        subl      %edx, %eax                                    #3.25
        addl      %eax, %eax                                    #3.25
        negl      %eax                                          #3.25
        subl      %eax, %ecx                                    #3.25
        movl      %ecx, %eax                                    #3.25
        ret                                                     #3.25

We would be in great shape if not for the moves.

llvm-svn: 16763
2004-10-06 05:01:07 +00:00
Chris Lattner
c959314701 Really fix FreeBSD, which apparently doesn't tolerate the extern.
Thanks to Jeff Cohen for pointing out my goof.

llvm-svn: 16762
2004-10-06 04:21:52 +00:00
Chris Lattner
09b6b3f514 Fix a scary bug with signed division by a power of two. We used to generate:
s:   ;; X / 4
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        sar %ECX, 1
        shr %ECX, 30
        mov %EDX, %EAX
        add %EDX, %ECX
        sar %EAX, 2
        ret

When we really meant:

s:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, %EAX
        sar %ECX, 1
        shr %ECX, 30
        add %EAX, %ECX
        sar %EAX, 2
        ret

Hey, this also reduces register pressure too :)

llvm-svn: 16761
2004-10-06 04:19:43 +00:00
Chris Lattner
9258948b08 Codegen signed divides by 2 and -2 more efficiently. In particular
instead of:

s:   ;; X / 2
        movl 4(%esp), %eax
        movl %eax, %ecx
        shrl $31, %ecx
        movl %eax, %edx
        addl %ecx, %edx
        sarl $1, %eax
        ret

t:   ;; X / -2
        movl 4(%esp), %eax
        movl %eax, %ecx
        shrl $31, %ecx
        movl %eax, %edx
        addl %ecx, %edx
        sarl $1, %eax
        negl %eax
        ret

Emit:

s:
        movl 4(%esp), %eax
        cmpl $-2147483648, %eax
        sbbl $-1, %eax
        sarl $1, %eax
        ret

t:
        movl 4(%esp), %eax
        cmpl $-2147483648, %eax
        sbbl $-1, %eax
        sarl $1, %eax
        negl %eax
        ret

llvm-svn: 16760
2004-10-06 04:02:39 +00:00
Chris Lattner
acd213fba3 Add some new instructions. Fix the asm string for sbb32rr
llvm-svn: 16759
2004-10-06 04:01:02 +00:00
Chris Lattner
5f0c904ec0 Reduce code growth implied by the tail duplication pass by not duplicating
an instruction if it can be hoisted to a common dominator of the block.
This implements: test/Regression/Transforms/TailDup/MergeTest.ll

llvm-svn: 16758
2004-10-06 03:27:37 +00:00
Chris Lattner
7a3968b6c0 When tail duplicating these functions, the add instruction should not be
duplicated, even though the block it is in is duplicated.

llvm-svn: 16757
2004-10-06 03:26:38 +00:00
Chris Lattner
b2e8fdc431 FreeBSD uses GCC. Patch contributed by Jeff Cohen!
llvm-svn: 16756
2004-10-06 03:15:44 +00:00
Chris Lattner
231c7ddfee Fix the path to the fixinc'd headers. Patch contributed by Jeff Cohen!
llvm-svn: 16755
2004-10-06 03:13:47 +00:00
Brian Gaeke
38641114a3 Must include sys/stat.h before declaring a 'struct stat'
llvm-svn: 16728
2004-10-05 18:46:59 +00:00
Brian Gaeke
109b8c6ede Build BFtoLLVM example front-end by default
llvm-svn: 16719
2004-10-05 18:05:53 +00:00
Brian Gaeke
bbe7875896 Add BFtoLLVM example front end
llvm-svn: 16714
2004-10-05 18:05:25 +00:00
Chris Lattner
6023408a6e Make sure the const bit gets inherited correctly when linking declarations
of disagreeing constness.  This fixes
test/Regression/Linker/ConstantGlobals[123].ll

llvm-svn: 16692
2004-10-05 02:28:11 +00:00
Chris Lattner
c8991a1e98 Another testcase for constness linkage
llvm-svn: 16691
2004-10-05 02:16:01 +00:00
Chris Lattner
84fddeb7a6 Testcase to ensure that the 'constant' flag follows the definition when there
is a question.

llvm-svn: 16690
2004-10-05 02:12:20 +00:00
Reid Spencer
bba26329ab Adjust sys/stat.h inclusion so its only for SunOS.
llvm-svn: 16686
2004-10-05 00:56:46 +00:00
Tanya Lattner
7198953962 Added a couple of includes to get this to compile on Sparc.
llvm-svn: 16685
2004-10-05 00:51:26 +00:00
Chris Lattner
6547451f32 Solaris doesn't have MAP_FILE.
llvm-svn: 16682
2004-10-05 00:46:21 +00:00
Chris Lattner
52a451f4b8 Bug fixed
llvm-svn: 16671
2004-10-05 00:23:02 +00:00
Chris Lattner
6df035f866 New testcase for PR450
llvm-svn: 16670
2004-10-05 00:18:21 +00:00
Reid Spencer
010dec5772 Add checks for the ZLIB and BZIP2 header files, not just the libraries.
llvm-svn: 16669
2004-10-04 22:05:53 +00:00
Chris Lattner
4463fcc2f8 Fix #include flavor
llvm-svn: 16658
2004-10-04 18:10:18 +00:00
Reid Spencer
5638e9632d Move the warning about no compression library down to the bottom, away
from the fray, so it gets noticed. This commit is made without the
corresponding configure script commit because it doesn't affect
functionality and we don't want to force everyone into another reconfigure

llvm-svn: 16657
2004-10-04 18:02:55 +00:00
Reid Spencer
5c3b0b7e23 Fix typo in makefile variable name that prevents zlib from being recognized
llvm-svn: 16656
2004-10-04 17:49:19 +00:00
Reid Spencer
fdf5f0f13a Add HAVE_BZIP2 and HAVE_ZLIB
llvm-svn: 16655
2004-10-04 17:48:37 +00:00
Reid Spencer
079b225788 Excise the ill-advised RLCOMP compression algorithm and simply leave the
previously temporary NULLCOMP implementation that merely copies the data
verbatim without compression. Also, don't warn if there's no compression
library as that is taken care of during configuration time.

llvm-svn: 16654
2004-10-04 17:45:44 +00:00
Misha Brukman
fd3ade5075 Add example 'abstract' architectures for LLI: MIX, MMIX, and DLX
llvm-svn: 16653
2004-10-04 17:36:35 +00:00
Reid Spencer
49089d64c2 Add a context for the callback so different compression scenarios can be
distinguished. Tidy up documentation.  Thanks, Chris.

llvm-svn: 16652
2004-10-04 17:29:25 +00:00
Reid Spencer
024857a516 Minor corrections suggested by Chris' ever-watchful eye.
llvm-svn: 16651
2004-10-04 17:26:26 +00:00
Chris Lattner
9f6c72d660 Fix build if not HAVE_BZIP2
llvm-svn: 16650
2004-10-04 16:33:25 +00:00
Reid Spencer
da2e8b9943 First version of the MappedFile abstraction for operating system idependent
mapping of files. This first version uses mmap where its available. The
class needs to implement an alternate mechanism based on malloc'd memory
and file reading/writing for platforms without virtual memory.

llvm-svn: 16649
2004-10-04 11:08:32 +00:00
Reid Spencer
d2bedc512d First version of a support utility to provide generalized compression in
LLVM that handles availability and unavailability of bzip2 and zlib.

llvm-svn: 16648
2004-10-04 10:49:41 +00:00
Chris Lattner
0228f228df * Prune #includes
* Update comments
* Rearrange code a bit
* Finally ELIMINATE the GAS workaround emitter for Intel mode.  woot!

llvm-svn: 16647
2004-10-04 07:31:08 +00:00
Chris Lattner
581948c8f6 Add support for emitting AT&T style .s files, and make it the default. Users
may now choose their output format with the -x86-asm-syntax={intel|att} flag.

llvm-svn: 16646
2004-10-04 07:24:48 +00:00
Chris Lattner
5959f4a108 Convert some missed patterns to support AT&T style
llvm-svn: 16645
2004-10-04 07:23:07 +00:00
Chris Lattner
a05d9f53bb Apparently the GNU assembler has a HUGE hack to be compatible with really
old and broken AT&T syntax assemblers.  The problem with this hack is that
*SOME* forms of the fdiv and fsub instructions have the 'r' bit inverted.
This was a real pain to figure out, but is trivially easy to support: thus
we are now bug compatible with gas and gcc.

llvm-svn: 16644
2004-10-04 07:08:46 +00:00
Reid Spencer
ab7aea0879 Provide support for auto-detection and use of compression libraries.
llvm-svn: 16643
2004-10-04 07:05:07 +00:00
Chris Lattner
08098895db Fix incorrect suffix
llvm-svn: 16642
2004-10-04 05:20:16 +00:00
Chris Lattner
c2fc9597bd Fix some more missed suffixes and swapped operands
llvm-svn: 16641
2004-10-04 01:38:10 +00:00
Chris Lattner
7b15a84728 Add missing suffixes to FP instructions for AT&T mode
llvm-svn: 16640
2004-10-04 00:43:31 +00:00
Chris Lattner
8d44dcca97 Add support for the -x86-asm-syntax flag, which can be used to choose between
Intel and AT&T style assembly language.  The ultimate goal of this is to
eliminate the GasBugWorkaroundEmitter class, but for now AT&T style emission
is not fully operational.

llvm-svn: 16639
2004-10-03 20:36:57 +00:00
Chris Lattner
94780713a8 Add support to the instruction patterns for AT&T style output, which will
hopefully lead to the death of the 'GasBugWorkaroundEmitter'.  This also
includes changes to wrap the whole file to 80 columns! Woot! :)

Note that the AT&T style output has not been tested at all.

llvm-svn: 16638
2004-10-03 20:35:00 +00:00
Chris Lattner
de7199b94f Correctly parse variant notation
llvm-svn: 16637
2004-10-03 20:19:02 +00:00
Chris Lattner
9a649a5c05 Add initial support for variants. This just parses the new format, no
functionality is added

llvm-svn: 16636
2004-10-03 19:34:31 +00:00
Chris Lattner
30b5b79aa0 Add initial support for variants
llvm-svn: 16635
2004-10-03 19:34:18 +00:00
Reid Spencer
43e2f0492a Add checks for bzip2 and libz for use with the Compression concept for
lib/System and the compressing llvm archiver.

llvm-svn: 16634
2004-10-02 08:50:58 +00:00
Chris Lattner
815b635639 Do not repeat the map lookup
llvm-svn: 16633
2004-10-01 23:16:43 +00:00
Chris Lattner
c521544a32 When a virtual register is folded into an instruction, keep track of whether
it was a use, def, or both.  This allows us to be less pessimistic in our
analysis of them.  In practice, this doesn't make a big difference, but it
doesn't hurt either.

llvm-svn: 16632
2004-10-01 23:15:36 +00:00
Chris Lattner
0e59bfe3a7 Add project
llvm-svn: 16631
2004-10-01 22:25:28 +00:00