1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-25 22:12:57 +02:00
Commit Graph

15706 Commits

Author SHA1 Message Date
Reid Spencer
9caeb2165b Remove unneeded CHECK_FOR_ERROR invocations.
llvm-svn: 31110
2006-10-22 07:03:09 +00:00
Chris Lattner
25fd3d8b99 regenerate
llvm-svn: 31109
2006-10-22 06:08:13 +00:00
Chris Lattner
0bf0490e0c change keyword to datalayout
llvm-svn: 31108
2006-10-22 06:07:41 +00:00
Chris Lattner
9e59066602 Change keyword to datalayout
llvm-svn: 31107
2006-10-22 06:06:56 +00:00
Chris Lattner
50b3810d9a 3 Changes:
1. Better document what is going on here.
2. Only hack on one branch per iteration, making the results less conservative.
3. Handle the problematic case by marking edges executable instead of by
   playing with value lattice states.  This is far less pessimistic, and fixes
   SCCP/ipsccp-gvar.ll.

llvm-svn: 31106
2006-10-22 05:59:17 +00:00
Chris Lattner
9e7d74961b don't break infinite loops
llvm-svn: 31102
2006-10-21 06:11:43 +00:00
Chris Lattner
a9adb1e558 Implement support for branch reversal, fix a bug in branch analysis.
This provides stuff like:

        cmpw cr0, r15, r29
        mr r14, r15
-       bge cr0, LBB3_111       ;bb656
-       b LBB3_90       ;bb501
+       blt cr0, LBB3_90        ;bb501
 LBB3_111:      ;bb656
        lwz r18, 68(r1)

which is particularly good for dispatch group formation.

llvm-svn: 31101
2006-10-21 06:03:11 +00:00
Chris Lattner
210d322b1f Use branch reversal to do stuff like this:
call L_strcmp$stub
        testl %eax, %eax
-       jne LBB26_208   #cond_true6020
-       jmp LBB26_227   #bb7119
+       je LBB26_227    #bb7119
 LBB26_208:     #cond_true6020
        movl $l31_str14, 4(%esp)

        testl %eax, %eax
-       jne LBB26_704   #cond_true13042
-       jmp LBB26_713   #bb13151
+       je LBB26_713    #bb13151
 LBB26_704:     #cond_true13042
        movl $_str52, 4(%esp)

        cmpl 76(%ecx), %eax
-       jge LBB26_1628  #cond_false63.i.i
-       jmp LBB26_1769  #_Z8makeGridP13mrSurfaceListidiidd.exit.i
+       jl LBB26_1769   #_Z8makeGridP13mrSurfaceListidiidd.exit.i
 LBB26_1628:    #cond_false63.i.i
        movl $0, 48964(%esp)

llvm-svn: 31100
2006-10-21 05:54:00 +00:00
Chris Lattner
016325f336 Implement support for branch condition reversal.
llvm-svn: 31099
2006-10-21 05:52:40 +00:00
Chris Lattner
cf7f500cfd Transform code like:
jle FOO
  jmp BAR
BAR:

into:

  jle FOO
BAR:

... whoa!

llvm-svn: 31098
2006-10-21 05:43:30 +00:00
Chris Lattner
20bb8bfd45 Simplify code, no functionality change
llvm-svn: 31097
2006-10-21 05:42:09 +00:00
Chris Lattner
d3e583b2e1 implement support for inserting a cond branch
llvm-svn: 31096
2006-10-21 05:36:13 +00:00
Chris Lattner
b638d287f4 allow insertion of a conditional branch with fall-through
llvm-svn: 31095
2006-10-21 05:34:23 +00:00
Chris Lattner
d4cb8dc857 Three changes:
1. Remove a bunch of ifdef'd code.
2. When a block just contains an uncond branch, change all blocks branching
   to it to jump to the destination instead.
3. If branch analysis tells us some edges in the machinecfg are not actually
   possible, remove them.

#2 triggers a suprisingly large number of times.

llvm-svn: 31094
2006-10-21 05:08:28 +00:00
Chris Lattner
596c126372 update assert message
llvm-svn: 31093
2006-10-21 04:42:29 +00:00
Chris Lattner
52a4558cf2 Add an experimental cross-jumping implementation.
This is currently disabled by default and limited in several ways, but does
have a positive effect.

llvm-svn: 31090
2006-10-21 00:47:49 +00:00
Chris Lattner
cf4137d0b9 implement MachineOperand::isIdenticalTo
llvm-svn: 31088
2006-10-20 22:39:59 +00:00
Chris Lattner
ef1a8ce64a llvm-extract should remove module-level asm
llvm-svn: 31086
2006-10-20 21:35:41 +00:00
Chris Lattner
3fb2b87f17 bugfix
llvm-svn: 31074
2006-10-20 20:44:34 +00:00
Chris Lattner
6ea0134893 Fix an ugly problem in SCCP. This fixes Benchmarks/Misc-C++/mandel-text.cpp
llvm-svn: 31073
2006-10-20 20:19:08 +00:00
Chris Lattner
38ed7d9e49 Fix miscompilation of MallocBench/espresso which code review pointed out
but apparently didn't make it into the final patch.

llvm-svn: 31070
2006-10-20 18:20:21 +00:00
Chris Lattner
2f54afc364 Make flag and chain edges visually distinguishable from value edges in DOT
output.

llvm-svn: 31067
2006-10-20 18:06:09 +00:00
Chris Lattner
62a0f00312 Implement branch analysis/xform hooks required by the branch folding pass.
llvm-svn: 31065
2006-10-20 17:42:20 +00:00
Reid Spencer
d414793dbc For PR950:
This patch implements the first increment for the Signless Types feature.
All changes pertain to removing the ConstantSInt and ConstantUInt classes
in favor of just using ConstantInt.

llvm-svn: 31063
2006-10-20 07:07:24 +00:00
Devang Patel
b030b91f4a While creating mask, use 1ULL instead of 1.
llvm-svn: 31062
2006-10-20 01:16:56 +00:00
Chris Lattner
d9aa90ded4 Fix SimplifyCFG/2006-10-19-UncondDiv.ll by disabling a bad xform.
llvm-svn: 31061
2006-10-20 00:42:07 +00:00
Chris Lattner
5b4ca99562 add a method to determine whether evaluation of a constant can trap.
llvm-svn: 31059
2006-10-20 00:27:06 +00:00
Bill Wendling
edce5ede57 Partially in response to PR926: insert the newly created machine basic
blocks into the basic block list when lowering the switch inst. into a
binary tree of if-then statements. This allows the "visitSwitchCase" func
to allow for fall-through behavior.

llvm-svn: 31057
2006-10-19 21:46:38 +00:00
Devang Patel
880a9d823f It is OK to remove extra cast if operation is EQ/NE even though source
and destination sign may not match but other conditions are met.

llvm-svn: 31056
2006-10-19 20:59:13 +00:00
Devang Patel
88406a6e1e Typo Typo.
llvm-svn: 31055
2006-10-19 19:21:36 +00:00
Devang Patel
277990c79f Typo.
llvm-svn: 31054
2006-10-19 19:05:38 +00:00
Devang Patel
d9ade71cc7 Fix bug in PR454 resolution. Added new test case.
This fixes llvmAsmParser.cpp miscompile by llvm on PowerPC Darwin.

llvm-svn: 31053
2006-10-19 18:54:08 +00:00
Rafael Espindola
c08546401b use Pat to implement extloadi8 and extloadi16
llvm-svn: 31052
2006-10-19 17:05:03 +00:00
Rafael Espindola
35e92188e0 implement undef
llvm-svn: 31049
2006-10-19 13:45:00 +00:00
Rafael Espindola
f7a41f3ddd print common symbols
llvm-svn: 31048
2006-10-19 13:30:40 +00:00
Rafael Espindola
722266845b implement extloadi8 and extloadi16
llvm-svn: 31047
2006-10-19 12:45:04 +00:00
Rafael Espindola
17544c1c1d expand SIGN_EXTEND_INREG
llvm-svn: 31046
2006-10-19 12:06:50 +00:00
Rafael Espindola
6cc20c7950 expand brind so that we don't have to implement jump tables right now
llvm-svn: 31045
2006-10-19 10:56:43 +00:00
Reid Spencer
765ea73e59 Beef up the output from DiffFilesWithTolerance by setting the error code
to describe the difference being reported. This assists with understanding
differences an llvm-test and should help with bugpoint too.

llvm-svn: 31044
2006-10-18 20:23:52 +00:00
Jim Laskey
c06a0bc853 Add option for controlling inclusion of global AA.
llvm-svn: 31040
2006-10-18 19:08:31 +00:00
Chris Lattner
a86fff7583 fit in 80 cols
llvm-svn: 31039
2006-10-18 18:26:48 +00:00
Chris Lattner
fa22dc8afe add a note
llvm-svn: 31038
2006-10-18 17:04:09 +00:00
Rafael Espindola
1220d18e11 add blx
llvm-svn: 31037
2006-10-18 16:21:43 +00:00
Rafael Espindola
73e8f41749 add isTerminatortto b and bcond
llvm-svn: 31036
2006-10-18 16:20:57 +00:00
Jim Laskey
288c230cbd Use global info for alias analysis.
llvm-svn: 31035
2006-10-18 12:29:57 +00:00
Rafael Espindola
80c7461ada implement CallingConv::Fast as CallingConv::C
llvm-svn: 31034
2006-10-18 12:03:07 +00:00
Anton Korobeynikov
d1436ae792 Fixed mingw\cygwin linkonce linkage once again.
Added workaround for linker bug with linkonce sections.
Changed sections prefix to allow linker merge them
(PE loader doesn't like too much long-named sections :) )
All of this unbreaks libstdc++ on mingw32 allowing (small)
programs to be compiled, linked and run.

llvm-svn: 31033
2006-10-18 09:12:29 +00:00
Owen Anderson
de03eb21c2 Add support for the new "target data" information in .ll files. This provides
a better encoding of the targets data layout, rather than trying to guess it
from the endianness and pointersize like before.

Update the generated files.

llvm-svn: 31031
2006-10-18 02:21:48 +00:00
Owen Anderson
12374f766b Add support for the new "target data" information in .ll files. This provides
a better encoding of the targets data layout, rather than trying to guess it
from the endianness and pointersize like before.

llvm-svn: 31030
2006-10-18 02:21:12 +00:00
Chris Lattner
0f686ec438 set the ppc64 stack pointer right, dynamic alloca now works for ppc64
llvm-svn: 31028
2006-10-18 01:20:43 +00:00
Chris Lattner
6c403f7102 Expand alloca for ppc64
llvm-svn: 31027
2006-10-18 01:18:48 +00:00
Chris Lattner
cd00328fad Teach the branch folder to update debug info if it removes blocks with line
# notes in it.

llvm-svn: 31026
2006-10-17 23:17:27 +00:00
Chris Lattner
22e311fe77 add a method to remove a line # record.
llvm-svn: 31025
2006-10-17 23:16:42 +00:00
Chris Lattner
7353b07275 expose DWARF_LABEL opcode# so the branch folder can update debug info properly.
llvm-svn: 31024
2006-10-17 22:41:45 +00:00
Chris Lattner
dd39b9d358 Do not leak all of the SourceLineInfo objects. Do not bother mallocing each
one separately.

llvm-svn: 31022
2006-10-17 22:06:46 +00:00
Chris Lattner
8708a62f93 Trivial patch to speed up legalizing common i64 constants.
llvm-svn: 31020
2006-10-17 21:47:13 +00:00
Chris Lattner
bfbb76e103 Fix CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
llvm-svn: 31019
2006-10-17 21:24:15 +00:00
Evan Cheng
3be6b15cd9 Fix printer for StoreSDNode.
llvm-svn: 31017
2006-10-17 21:18:26 +00:00
Evan Cheng
2113bdf34d Reflect MemOpAddrMode change; added a helper to create pre-indexed load.
llvm-svn: 31016
2006-10-17 21:14:32 +00:00
Rafael Espindola
58233f9db2 expand ISD::SDIV, ISD::UDIV, ISD::SREM and ISD::UREM
llvm-svn: 31014
2006-10-17 21:05:33 +00:00
Rafael Espindola
4e8824608e add the FPUnaryOp and DFPUnaryOp classes
llvm-svn: 31013
2006-10-17 20:45:22 +00:00
Rafael Espindola
fe0a9a6fe2 add FABSS and FABSD
llvm-svn: 31012
2006-10-17 20:33:13 +00:00
Anton Korobeynikov
c86485a397 Adding linkonce linkage codegeneration support for mingw32\cygwin
targets.

llvm-svn: 31011
2006-10-17 20:29:49 +00:00
Jim Laskey
7f16ca4872 Make it simplier to dump DAGs while in DAGCombiner. Remove a nasty optimization.
llvm-svn: 31009
2006-10-17 19:33:52 +00:00
Rafael Espindola
64f93033bc remove extra [] in stores
llvm-svn: 31008
2006-10-17 18:29:14 +00:00
Chris Lattner
ef7c2e1dc8 Enable deleting branches to successor blocks. With the previous patches,
branch folding can now compile stuff like this:

void foo(int W, int X, int Y, int Z) {
  if (W & 1) {
    for (; X;--X) bar();
  } else if (W & 2) {
    for (; Y;--Y) bar();
  } else if (W & 4) {
    for (; Z;--Z) bar();
  } else if (W & 8) {
    for (; W;--W) bar();
  }

  if (W) {
    bar();
  }
}

contrived testcase where loops exits all end up merging together.  To have
the loop merges be:

...
        cmplw cr0, r30, r27
        bne cr0, LBB1_14        ;bb38
LBB1_16:        ;cond_next48.loopexit
        mr r27, r29
LBB1_20:        ;cond_next48
        cmplwi cr0, r27, 0
        beq cr0, LBB1_22        ;UnifiedReturnBlock
...

instead of:


...
        cmplw cr0, r30, r27
        bne cr0, LBB1_14        ;bb38
LBB1_16:        ;cond_next48.loopexit
        mr r27, r29
        b LBB1_20       ;cond_next48
LBB1_17:        ;cond_next48.loopexit1
        b LBB1_20       ;cond_next48
LBB1_18:        ;cond_next48.loopexit2
        b LBB1_20       ;cond_next48
LBB1_19:        ;cond_next48.loopexit3
LBB1_20:        ;cond_next48
        cmplwi cr0, r27, 0
        beq cr0, LBB1_22        ;UnifiedReturnBlock
...


This is CodeGen/PowerPC/branch-opt.ll

llvm-svn: 31006
2006-10-17 18:16:40 +00:00
Chris Lattner
bf15d2f503 add support for inserting an uncond branch
llvm-svn: 31003
2006-10-17 18:06:55 +00:00
Rafael Espindola
47970f96ac initial implementation of addressing mode 5
llvm-svn: 31002
2006-10-17 18:04:53 +00:00
Jim Laskey
37679253ae Clean up interface to getGlobalLinkName.
llvm-svn: 31001
2006-10-17 17:17:24 +00:00
Chris Lattner
8464c7e3e1 Reenable this pass, fixing the bugs in it.
It now correctly deletes unreachable blocks and blocks that are empty.

llvm-svn: 31000
2006-10-17 17:13:52 +00:00
Evan Cheng
d9bec725a2 Make sure operand does have size and element type operands.
llvm-svn: 30999
2006-10-17 17:06:35 +00:00
Rafael Espindola
d5a6eaec14 add the immediate to the Offset in eliminateFrameIndex
llvm-svn: 30998
2006-10-17 14:34:02 +00:00
Jim Laskey
620df3ea5f Basic support for getGlobalLinkName.
llvm-svn: 30997
2006-10-17 13:41:07 +00:00
Rafael Espindola
31f59f5b94 add FSTD and FSTS
llvm-svn: 30996
2006-10-17 13:36:07 +00:00
Rafael Espindola
01400015fc add FCPYS and FCPYD
llvm-svn: 30995
2006-10-17 13:13:23 +00:00
Jim Laskey
d266d999cb Use S_debug for dwarf info.
llvm-svn: 30994
2006-10-17 11:30:57 +00:00
Evan Cheng
bf3e5a1d7a Proper fix.
llvm-svn: 30993
2006-10-17 00:24:49 +00:00
Evan Cheng
36ab7976a0 One more try.
llvm-svn: 30992
2006-10-16 23:44:08 +00:00
Reid Spencer
c6aa794a41 Undo Chris' last patch, it caused a regression.
llvm-svn: 30991
2006-10-16 23:08:08 +00:00
Evan Cheng
1cf362ec18 Unbreak x86-64 build.
llvm-svn: 30990
2006-10-16 22:53:28 +00:00
Evan Cheng
2d9318cff1 Be careful when looking through a vbit_convert. Optimizing this:
(vector_shuffle
  (vbitconvert (vbuildvector (copyfromreg v4f32), 1, v4f32), 4, f32),
  (undef, undef, undef, undef), (0, 0, 0, 0), 4, f32)
to the
  vbitconvert
is a very bad idea.

llvm-svn: 30989
2006-10-16 22:49:37 +00:00
Rafael Espindola
a156538e34 add fdivs e fdivd
llvm-svn: 30988
2006-10-16 21:50:04 +00:00
Rafael Espindola
4f61431679 expand ISD::SHL_PARTS, ISD::SRA_PARTS and ISD::SRL_PARTS
llvm-svn: 30987
2006-10-16 21:10:32 +00:00
Evan Cheng
04bec886a9 Added a X86CompilationCallback variant which saves XMM argument registers for targets with SSE.
llvm-svn: 30986
2006-10-16 21:01:55 +00:00
Evan Cheng
b4d8323e54 Proper fix for rdar://problem/4770604 Thanks to Stuart Hastings!
llvm-svn: 30985
2006-10-16 21:00:37 +00:00
Jim Laskey
06f4428abc Pass AliasAnalysis thru to DAGCombiner.
llvm-svn: 30984
2006-10-16 20:52:31 +00:00
Jim Laskey
5fe261bc52 Global name regression.
llvm-svn: 30982
2006-10-16 19:38:41 +00:00
Rafael Espindola
25fbeb5ec2 define the DFPBinOp class
llvm-svn: 30981
2006-10-16 18:39:22 +00:00
Rafael Espindola
57dc40a3a8 add the FPBinOp class
llvm-svn: 30980
2006-10-16 18:32:36 +00:00
Rafael Espindola
8a40de94f2 define the Addr1BinOp class
llvm-svn: 30979
2006-10-16 18:18:14 +00:00
Rafael Espindola
6554b1014d define the IntBinOp class and use it to implement the multiply instructions
llvm-svn: 30978
2006-10-16 17:57:20 +00:00
Rafael Espindola
71ae8c3d4a fix assembly syntax
llvm-svn: 30977
2006-10-16 17:38:12 +00:00
Rafael Espindola
d255bfb09b implement LDRB, LDRSB, LDRH and LDRSH
llvm-svn: 30976
2006-10-16 17:17:22 +00:00
Rafael Espindola
93359fa883 implement smull and umull
llvm-svn: 30975
2006-10-16 16:33:29 +00:00
Evan Cheng
ca5eaf4020 Avoid getting into an infinite loop when -disable-x86-shuffle-opti is specified.
llvm-svn: 30974
2006-10-16 06:36:00 +00:00
Evan Cheng
c73547a71d SelectScalarSSELoad should call CanBeFoldedBy as well.
llvm-svn: 30973
2006-10-16 06:34:55 +00:00
Chris Lattner
9ecd9bcde4 regenerate
llvm-svn: 30971
2006-10-15 23:27:25 +00:00
Chris Lattner
2ac6ca0f82 Produce a useful error message for Regression/Verifier/2006-10-15-AddrLabel.ll
llvm-svn: 30970
2006-10-15 23:26:46 +00:00
Chris Lattner
5a4d067b26 label* is not a valid type, add an assertion.
llvm-svn: 30968
2006-10-15 23:21:12 +00:00
Chris Lattner
fd983f91e7 fix a buggy check that accidentally disabled this xform
llvm-svn: 30967
2006-10-15 22:42:15 +00:00
Anton Korobeynikov
61d88967b9 Align stack size to DWORD boundary
llvm-svn: 30964
2006-10-14 20:53:35 +00:00
Rafael Espindola
c426cede28 expand ISD::BRCOND
llvm-svn: 30963
2006-10-14 17:59:54 +00:00
Rafael Espindola
38c602f658 fix some fp condition codes
use non trapping comparison instructions

llvm-svn: 30962
2006-10-14 13:42:53 +00:00
Jim Laskey
1070dfefba Tidy up after truncstore changes.
llvm-svn: 30961
2006-10-14 12:14:27 +00:00
Evan Cheng
bfd74c6d9c Debug tweak.
llvm-svn: 30959
2006-10-14 08:34:06 +00:00
Evan Cheng
6c8de88f88 Corrected load folding check. We need to start from the root of the sub-dag
being matched and ensure there isn't a non-direct path to the load (i.e. a
path that goes out of the sub-dag.)

llvm-svn: 30958
2006-10-14 08:33:25 +00:00
Chris Lattner
08aa96b824 Make sure that the node returned by SimplifySetCC is added to the worklist
so that it can be deleted if unused.

llvm-svn: 30955
2006-10-14 03:52:46 +00:00
Chris Lattner
a515f322f3 fold setcc of a setcc.
llvm-svn: 30953
2006-10-14 01:02:29 +00:00
Chris Lattner
25ad62d132 When SimplifySetCC was moved to the DAGCombiner, it was never removed from
SelectionDAG and it has since bitrotted.  Remove the copy from SelectionDAG.
Next, remove the constant folding piece of DAGCombiner::SimplifySetCC into
a new FoldSetCC method which can be used by getNode() and SimplifySetCC.

This fixes obscure bugs.

llvm-svn: 30952
2006-10-14 00:41:01 +00:00
Chris Lattner
c4be6ee8b8 disable this pass for now, it's causing issues
llvm-svn: 30951
2006-10-14 00:30:06 +00:00
Chris Lattner
1c35d27fc4 falling off the end of a function is ok with an unreachable instruction.
llvm-svn: 30950
2006-10-14 00:21:48 +00:00
Jim Laskey
bf50140aac Reduce the workload by not adding chain users to work list.
llvm-svn: 30948
2006-10-13 23:32:28 +00:00
Chris Lattner
9b1878a28c Fix a bug where we incorrectly turned '(X & 0) == 0' into '(X & 0) >> -1',
which is undefined.  "0" isn't a power of 2.

llvm-svn: 30947
2006-10-13 22:46:18 +00:00
Chris Lattner
b90d66b7da implement branch inspection/modification methods.
llvm-svn: 30946
2006-10-13 21:21:17 +00:00
Evan Cheng
fe5bb5dbe6 Merge ISD::TRUNCSTORE to ISD::STORE. Switch to using StoreSDNode.
llvm-svn: 30945
2006-10-13 21:14:26 +00:00
Chris Lattner
87d16e0ed9 add the branch folding pass as a late cleanup pass for all targets. For now
it just deletes empty MBB's.  Soon it will do more :)

llvm-svn: 30941
2006-10-13 20:45:56 +00:00
Chris Lattner
ae19ea4b2a disable some objectionable code, maybe we can bring this pass to life
llvm-svn: 30939
2006-10-13 20:43:10 +00:00
Chris Lattner
bea7a9de50 remove some dead code
llvm-svn: 30938
2006-10-13 20:40:42 +00:00
Chris Lattner
b22754853b add note
llvm-svn: 30937
2006-10-13 20:20:58 +00:00
Chris Lattner
14f18d4896 set isBarrier correctly
llvm-svn: 30936
2006-10-13 19:10:34 +00:00
Chris Lattner
a3f4e611a4 Correctly handle instruction separators.
llvm-svn: 30935
2006-10-13 17:56:02 +00:00
Chris Lattner
63ec35db20 Expose method and ivars for measuring inline asm length properly.
llvm-svn: 30934
2006-10-13 17:50:07 +00:00
Andrew Lenharth
254a2fbcd0 Move some warnings to debug mode.
llvm-svn: 30933
2006-10-13 17:38:22 +00:00
Rafael Espindola
1f85e7a2a1 add FNEGS and FNEGD
llvm-svn: 30932
2006-10-13 17:37:35 +00:00
Chris Lattner
5cd4ac0a79 Fix another dtor issue. The function local statics in this function were
being destroyed at inconvenient times.  Switch to using non-local ManagedStatic
objects, which actually also speeds up ConstRules::get.

llvm-svn: 30931
2006-10-13 17:22:21 +00:00
Rafael Espindola
042e61af25 add SBCS and SUBS
llvm-svn: 30930
2006-10-13 17:19:20 +00:00
Rafael Espindola
24d7976d1d implement calls to functions that return long
llvm-svn: 30929
2006-10-13 16:47:22 +00:00
Rafael Espindola
bd50a4d757 implement unordered floating point compares
llvm-svn: 30928
2006-10-13 13:14:59 +00:00
Jim Laskey
a053c74b99 Workaround for templates
llvm-svn: 30927
2006-10-13 13:02:19 +00:00
Jim Laskey
20feec042a Clean up dump.
llvm-svn: 30926
2006-10-13 13:01:34 +00:00
Chris Lattner
9476464f86 avoid a ctor/dtor issue with the ProgramName global.
llvm-svn: 30925
2006-10-13 00:06:24 +00:00
Chris Lattner
f48558b3b6 shrink anon-ns and mark stuff static. No functionality changes
llvm-svn: 30922
2006-10-12 22:09:17 +00:00
Chris Lattner
9777da8fb7 add a note
llvm-svn: 30921
2006-10-12 22:01:26 +00:00
Chris Lattner
70444d5663 Lower X%C into X/C+stuff. This allows the 'division by a constant' logic to
apply to rems as well as divs.  This fixes PR945 and speeds up ReedSolomon
from 14.57s to 10.90s (which is now faster than gcc).

It compiles CodeGen/X86/rem.ll into:

_test1:
        subl $4, %esp
        movl %esi, (%esp)
        movl $2155905153, %ecx
        movl 8(%esp), %esi
        movl %esi, %eax
        imull %ecx
        addl %esi, %edx
        movl %edx, %eax
        shrl $31, %eax
        sarl $7, %edx
        addl %eax, %edx
        imull $255, %edx, %eax
        subl %eax, %esi
        movl %esi, %eax
        movl (%esp), %esi
        addl $4, %esp
        ret
_test2:
        movl 4(%esp), %eax
        movl %eax, %ecx
        sarl $31, %ecx
        shrl $24, %ecx
        addl %eax, %ecx
        andl $4294967040, %ecx
        subl %ecx, %eax
        ret
_test3:
        subl $4, %esp
        movl %esi, (%esp)
        movl $2155905153, %ecx
        movl 8(%esp), %esi
        movl %esi, %eax
        mull %ecx
        shrl $7, %edx
        imull $255, %edx, %eax
        subl %eax, %esi
        movl %esi, %eax
        movl (%esp), %esi
        addl $4, %esp
        ret

instead of div/idiv instructions.

llvm-svn: 30920
2006-10-12 20:58:32 +00:00
Evan Cheng
61afb767eb Add RemoveDeadNode to remove a dead node and its (potentially) dead operands.
llvm-svn: 30916
2006-10-12 20:34:05 +00:00
Chris Lattner
e38ce54cc9 add a minor dag combine noticed when looking at PR945
llvm-svn: 30915
2006-10-12 20:23:19 +00:00
Evan Cheng
76d365ac84 Doh. This wasn't causing problems by luck.
llvm-svn: 30914
2006-10-12 19:13:59 +00:00
Evan Cheng
d07e2f081a Some X86ISD::CMP were created with wrong ValueType's.
llvm-svn: 30913
2006-10-12 19:12:56 +00:00
Chris Lattner
08684eac36 Fix massive resource leaks in the bytecode reader. Reading a bytecode file
with ParseBytecodeFile used to leak both a ModuleProvider (and related
bytecode parser stuff attached to it) AND a file descriptor, which was
never closed.  This prevented gccld/llvm-ld/llvm-link from linking together
apps with more that ~252 .bc files on darwin.

llvm-svn: 30912
2006-10-12 18:32:30 +00:00
Chris Lattner
b7ff3d59f7 mark call adjustments as modifying the SP
llvm-svn: 30911
2006-10-12 18:00:26 +00:00
Chris Lattner
80790cad34 adjcallstack up/down clobbers the sp
llvm-svn: 30910
2006-10-12 18:00:14 +00:00
Chris Lattner
e7ea164744 adjcallstackup/down clobbers the stack pointer
llvm-svn: 30909
2006-10-12 17:57:58 +00:00
Chris Lattner
e4e8893807 mark adjcallstack up/down as clobbering and using the SP
llvm-svn: 30908
2006-10-12 17:56:34 +00:00
Chris Lattner
b25677f5ca Move the Imp tblgen class from the X86 backend to common code.
llvm-svn: 30907
2006-10-12 17:49:27 +00:00
Chris Lattner
4d707afe22 restore my previous patch, now that the X86 backend bug has been fixed:
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20061009/038518.html

llvm-svn: 30906
2006-10-12 17:45:38 +00:00
Chris Lattner
a678c5bac1 Mark ADJCALLSTACKUP/DOWN as clobbering ESP so that virtregmap will notice
that it can't assume ESP is unmodified across the instrs.

llvm-svn: 30905
2006-10-12 17:42:56 +00:00
Jim Laskey
388c9681ef D'oh - need to use the rigth kind of store.
llvm-svn: 30903
2006-10-12 15:22:24 +00:00
Evan Cheng
68aef84a57 Backing out Chris' last commit. It's breaking llvm-gcc bootstrapping.
It's turning:
        movl -24(%ebp), %esp
        subl $16, %esp
        movl -24(%ebp), %ecx
into
        movl -24(%ebp), %esp
        subl $16, %esp
        movl %esp, (%esp)

llvm-svn: 30902
2006-10-12 08:00:47 +00:00
Chris Lattner
fde6859201 fix compilation failure of smg2000
llvm-svn: 30900
2006-10-12 03:55:48 +00:00
Chris Lattner
d781da5f26 If we see a load from a stack slot into a physreg, consider it as providing
the stack slot.  This fixes PR943.

llvm-svn: 30898
2006-10-12 02:34:07 +00:00
Nick Lewycky
686cc9cacc Replace custom dispatch code with two uses of InstVisitor. Improves
compile-time performance.

llvm-svn: 30896
2006-10-12 02:02:44 +00:00
Chris Lattner
502246c4a6 Fold "zero extending vector loads" now that evan added the chain manip stuff.
This compiles both tests in X86/vec_ss_load_fold.ll into:

_test1:
        movss 4(%esp), %xmm0
        subss LCPI1_0, %xmm0
        mulss LCPI1_1, %xmm0
        minss LCPI1_2, %xmm0
        xorps %xmm1, %xmm1
        maxss %xmm1, %xmm0
        cvttss2si %xmm0, %eax
        andl $65535, %eax
        ret

instead of:

_test1:
        movss LCPI1_0, %xmm0
        movss 4(%esp), %xmm1
        subss %xmm0, %xmm1
        movss LCPI1_1, %xmm0
        mulss %xmm0, %xmm1
        movss LCPI1_2, %xmm0
        minss %xmm0, %xmm1
        xorps %xmm0, %xmm0
        maxss %xmm0, %xmm1
        cvttss2si %xmm1, %eax
        andl $65535, %eax
        ret

llvm-svn: 30894
2006-10-11 22:09:58 +00:00
Evan Cheng
95140c9c64 ComplexPatterns sse_load_f32 and sse_load_f64 returns in / out chain operands.
llvm-svn: 30892
2006-10-11 21:06:01 +00:00
Evan Cheng
ca66f49574 Add properties to ComplexPattern.
llvm-svn: 30891
2006-10-11 21:03:53 +00:00
Jim Laskey
eba756c1a7 Alias analysis of TRUNCSTORE.
llvm-svn: 30889
2006-10-11 18:55:16 +00:00
Jim Laskey
6efcd15c88 Typo
llvm-svn: 30884
2006-10-11 17:52:19 +00:00
Jim Laskey
4791a4ad14 Handle aliasing of loadext.
llvm-svn: 30883
2006-10-11 17:47:52 +00:00
Andrew Lenharth
55db991b47 Reduce the amount of state in the lowering code and drop old pattern ISel functions
llvm-svn: 30881
2006-10-11 16:24:51 +00:00
Jim Laskey
fd6218f8f5 Fix regression in combiner alias analysis.
llvm-svn: 30880
2006-10-11 13:47:09 +00:00
Evan Cheng
9b31a4d4ed Naming consistency.
llvm-svn: 30878
2006-10-11 07:10:22 +00:00
Chris Lattner
b9c1ea6dcc Use cute tblgen tricks to make zap handling more powerful. Specifically,
when the dag combiner simplifies an and mask, notice this and allow those bits
to be missing from the zap mask.

This compiles Alpha/zapnot4.ll into:

        sll $16,3,$0
        zapnot $0,3,$0
        ret $31,($26),1

instead of:

        ldah $0,1($31)
        lda $0,-8($0)
        sll $16,3,$1
        and $1,$0,$0
        ret $31,($26),1

It would be *really* nice to replace the hunk of code in the
AlphaISelDAGToDAG.cpp file that matches (and (srl (x, C), c2) into
(SRL (ZAPNOTi)) with a similar pattern, but I've spent enough time poking
at alpha.  Make andrew will do this.

llvm-svn: 30875
2006-10-11 05:13:56 +00:00
Andrew Lenharth
4b783303e5 Jimptables working again on alpha.
As a bonus, use the GOT node instead of the AlphaISD::GOT for internal stuff.

llvm-svn: 30873
2006-10-11 04:29:42 +00:00
Chris Lattner
95a8905db2 Remove dead/redundant instructions. These are handled by ZAPNOTi
llvm-svn: 30872
2006-10-11 04:12:39 +00:00
Chris Lattner
aa1741fc87 add two helper methods.
llvm-svn: 30869
2006-10-11 03:58:02 +00:00
Andrew Lenharth
7a79f1df15 This entry is done. switched to the gcc way of doing things.
llvm-svn: 30867
2006-10-11 01:48:03 +00:00
Evan Cheng
d6b419ecb0 FindModifiedNodeSlot needs to add LoadSDNode ivars to create proper SelectionDAGCSEMap ID.
llvm-svn: 30866
2006-10-11 01:47:58 +00:00
Chris Lattner
4be8276d27 This has apparently been fixed
llvm-svn: 30864
2006-10-11 01:44:46 +00:00
Rafael Espindola
46e7aceb1d uint <-> double conversion
llvm-svn: 30862
2006-10-10 20:38:57 +00:00
Evan Cheng
0d8a340a8f Also update getNodeLabel for LoadSDNode.
llvm-svn: 30861
2006-10-10 20:11:26 +00:00
Evan Cheng
a12747d2b4 SDNode::dump should also print out extension type and VT.
llvm-svn: 30860
2006-10-10 20:05:10 +00:00
Rafael Espindola
0112351e9a add fp sub
llvm-svn: 30859
2006-10-10 19:35:01 +00:00
Rafael Espindola
27d68a3c22 add double <-> int conversion
llvm-svn: 30858
2006-10-10 18:55:14 +00:00
Chris Lattner
e0734f522f Fix another bug in extload promotion.
llvm-svn: 30857
2006-10-10 18:54:19 +00:00
Rafael Espindola
413aa20bc8 compare doubles
llvm-svn: 30856
2006-10-10 16:33:47 +00:00
Rafael Espindola
b0719f1374 initial support for fp compares. Unordered compares not implemented yet
llvm-svn: 30854
2006-10-10 12:56:00 +00:00
Evan Cheng
070ae65fa8 Fix a bug introduced by my LOAD/LOADX changes.
llvm-svn: 30853
2006-10-10 07:51:21 +00:00
Evan Cheng
b2998e15f2 More isel time load folding checking for nodes that produce flag values.
See comment in CanBeFoldedBy() for detailed explanation.

llvm-svn: 30851
2006-10-10 01:46:56 +00:00
Evan Cheng
d1a37cb9dc Don't go too crazy with these AddComplexity. Try matching shufps with load
folding first.

llvm-svn: 30848
2006-10-09 21:42:15 +00:00
Evan Cheng
8f6c6b19e6 Don't convert to MOVLP if using shufps etc. may allow load folding.
llvm-svn: 30847
2006-10-09 21:39:25 +00:00
Evan Cheng
d22f3dd3ed Reflects ISD::LOAD / ISD::LOADX / LoadSDNode changes.
llvm-svn: 30844
2006-10-09 20:57:25 +00:00
Rafael Espindola
bae07b25d6 add float -> double and double -> float conversion
llvm-svn: 30835
2006-10-09 17:50:29 +00:00
Reid Spencer
2e9ba7166f Fix PR886:
The result of yyparse() was not being checked. When YYERROR or YYABORT is
called it causes yyparse() to return 1 to indicate the error. The code was
silently ignoring this situation because it previously expected either an
exception or a null ParserResult to indicate an error. The patch corrects
this situation.

llvm-svn: 30834
2006-10-09 17:36:59 +00:00
Chris Lattner
0c057ef048 Fix a bug pointed out by Zhongxing Xu
llvm-svn: 30831
2006-10-09 17:28:13 +00:00
Rafael Espindola
f917f096e2 add ADDS and ADCS
llvm-svn: 30830
2006-10-09 17:18:28 +00:00
Rafael Espindola
319b5e9c95 expand ISD::SELECT
llvm-svn: 30829
2006-10-09 16:28:33 +00:00
Rafael Espindola
fed11f040c add a note
llvm-svn: 30828
2006-10-09 14:18:33 +00:00
Rafael Espindola
aaeadcb6f5 expand ISD::EXTLOAD
llvm-svn: 30827
2006-10-09 14:13:40 +00:00
Rafael Espindola
1e16a7e972 most ARM targets are little endian
llvm-svn: 30826
2006-10-09 14:12:15 +00:00
Chris Lattner
9f980ec2a1 Implement SROA of unions with mixed pointers/integers in them. This implements
PR892 and Transforms/ScalarRepl/union-pointer.ll:test2

llvm-svn: 30825
2006-10-08 23:53:04 +00:00
Chris Lattner
f8afa75cef Implement Transforms/ScalarRepl/union-pointer.ll:test
llvm-svn: 30823
2006-10-08 23:28:04 +00:00
Chris Lattner
b0e0a23959 Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A.  With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:

__Z4test3Val:
        subl $44, %esp
        call L__Z3foov$stub
        movl %edx, 28(%esp)
        movl %eax, 32(%esp)
        movl %eax, 24(%esp)
        movl %edx, 36(%esp)
        movl 52(%esp), %ecx
        movl %ecx, 4(%esp)
        movl %eax, 8(%esp)
        movl %edx, 12(%esp)
        movl 48(%esp), %eax
        movl %eax, (%esp)
        call L__Z3bar3ValS_$stub
        addl $44, %esp
        ret

instead of:

__Z4test3Val:
        subl $44, %esp
        call L__Z3foov$stub
        movl %eax, 24(%esp)
        movl %edx, 28(%esp)
        movl 24(%esp), %eax
        movl %eax, 32(%esp)
        movl 28(%esp), %eax
        movl %eax, 36(%esp)
        movl 32(%esp), %eax
        movl 36(%esp), %ecx
        movl 52(%esp), %edx
        movl %edx, 4(%esp)
        movl %eax, 8(%esp)
        movl %ecx, 12(%esp)
        movl 48(%esp), %eax
        movl %eax, (%esp)
        call L__Z3bar3ValS_$stub
        addl $44, %esp
        ret

llvm-svn: 30821
2006-10-08 22:57:01 +00:00
Jim Laskey
9260b2f86e Combiner alias analysis passes Multisource (release-asserts.)
llvm-svn: 30818
2006-10-07 23:37:56 +00:00
Chris Lattner
3cd1d08ac6 completely disable folding of loads into scalar sse instructions and provide
a framework for doing it right.  This fixes
CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll.

Once X86DAGToDAGISel::SelectScalarSSELoad is implemented right, this task
will be done.

llvm-svn: 30817
2006-10-07 21:55:32 +00:00
Chris Lattner
a51aea84b8 convert packed FP add/sub/mul/div to use a multiclass.
llvm-svn: 30815
2006-10-07 21:17:13 +00:00
Chris Lattner
da75127cea one multiclass now defines all 8 variants of binary-scalar-sse-fp operations.
llvm-svn: 30814
2006-10-07 20:55:57 +00:00
Chris Lattner
8ce6993f53 Switch ADD/MUL/DIV/SUB scalarsse fp ops to a multiclass
llvm-svn: 30813
2006-10-07 20:35:44 +00:00
Chris Lattner
ec39f5bcd5 Random acts of shrinkage
llvm-svn: 30812
2006-10-07 19:49:05 +00:00
Chris Lattner
8e3aa16298 Convert pand/por/pxor to use multiclass
llvm-svn: 30811
2006-10-07 19:37:30 +00:00
Chris Lattner
33aecdebfc Convert some more instructions over to use a new multiclass.
Fix a bug where the asmstring for PSUBQrm was wrong.

llvm-svn: 30810
2006-10-07 19:34:33 +00:00
Chris Lattner
260659336a Fix a bug where PADDQrm printed paddd instead of paddq.
llvm-svn: 30809
2006-10-07 19:15:46 +00:00
Chris Lattner
0122bfac98 Add multiclass for SSE2 instructions that correspond to simple binops.
llvm-svn: 30808
2006-10-07 19:14:49 +00:00
Chris Lattner
db12d69657 rename:
PDI_binop_rm -> PDI_binop_rm_int
  PDI_binop_rmi -> PDI_binop_rmi_int

to make it clear that these are for use with intrinsics.

llvm-svn: 30807
2006-10-07 19:02:31 +00:00
Chris Lattner
36709eed45 Convert saturating PADD/PSUB's to use a multiclass
llvm-svn: 30806
2006-10-07 18:48:46 +00:00
Chris Lattner
d5d4378010 Convert PAVG*, PMADDWD, and PMUL* to use multiclasses.
llvm-svn: 30805
2006-10-07 18:39:00 +00:00
Chris Lattner
753ec9950a Fix typo in packsswb instr definition, where the load had the wrong type.
This allows us to use the multiclass for other packs.

llvm-svn: 30804
2006-10-07 18:23:58 +00:00
Rafael Espindola
38e9e2e01d implement FUITOS and FUITOD
llvm-svn: 30803
2006-10-07 14:24:52 +00:00
Rafael Espindola
90a24709fb implement FLDD
llvm-svn: 30802
2006-10-07 14:03:39 +00:00
Rafael Espindola
b8ce0f8bbd implement fadds, faddd, fmuls and fmuld
llvm-svn: 30801
2006-10-07 13:46:42 +00:00
Chris Lattner
59bf33e5e4 handle pmin/pmax with multiclasses
llvm-svn: 30800
2006-10-07 07:49:33 +00:00
Chris Lattner
2177d324c5 simplify pack and shift intrinsics with multiclasses
llvm-svn: 30797
2006-10-07 07:06:17 +00:00
Chris Lattner
31eb3af1a8 Use a multiclass to simplify 'SSE2 Integer comparison'
llvm-svn: 30796
2006-10-07 06:47:08 +00:00
Chris Lattner
7cde5d8820 move class defns close to uses to make it easier to read
llvm-svn: 30795
2006-10-07 06:33:36 +00:00
Chris Lattner
2842be4e37 simplify horizontal op definitions
llvm-svn: 30794
2006-10-07 06:31:41 +00:00
Chris Lattner
b3b659492b remove more unneeded type info
llvm-svn: 30793
2006-10-07 06:27:03 +00:00
Chris Lattner
8a2d78d3cf remove unneeded definitions and type info
llvm-svn: 30792
2006-10-07 06:19:41 +00:00
Chris Lattner
a75da38d99 remove some unneeded type info
llvm-svn: 30791
2006-10-07 06:17:43 +00:00
Chris Lattner
d704b454b9 simplify patterns by merging in operand info
llvm-svn: 30790
2006-10-07 05:50:25 +00:00
Chris Lattner
bf6419cef6 Factor operands into packed unary classes
llvm-svn: 30789
2006-10-07 05:47:20 +00:00
Chris Lattner
06c9aa41f1 remove dead/duplicate instructions
llvm-svn: 30788
2006-10-07 05:41:52 +00:00
Chris Lattner
72b130720d Pull operand info up into parent class for scalar sse intrinsics.
llvm-svn: 30787
2006-10-07 05:26:13 +00:00
Chris Lattner
cf13d058a3 convert the sole sd unary intrinsic to a multiclass for consistency
llvm-svn: 30786
2006-10-07 05:19:31 +00:00
Chris Lattner
67ea3292d2 pull operand string into the multiclass
llvm-svn: 30785
2006-10-07 05:13:26 +00:00
Chris Lattner
e234302d01 Remove RSQRTSS[rm] RCPSS[rm], which are dead.
Introduce SS_IntUnary, a multiclass to replace SS_Int[rm].

llvm-svn: 30784
2006-10-07 05:09:48 +00:00
Chris Lattner
22137d1891 eliminate redundancy
llvm-svn: 30783
2006-10-07 04:52:09 +00:00
Chris Lattner
f5758df6cd Fix a bug legalizing zero-extending i64 loads into 32-bit loads. The bottom
part was always forced to be sextload, even when we needed an zextload.

llvm-svn: 30782
2006-10-07 00:58:36 +00:00
Chris Lattner
f5b9b4a4b2 Set the jt section
llvm-svn: 30781
2006-10-06 22:52:33 +00:00
Chris Lattner
3f92c791b4 initialize ivar
llvm-svn: 30780
2006-10-06 22:52:08 +00:00
Chris Lattner
d5f5a433b2 If a target uses a GOT, put it in the jt data section, not the text
section.  This will fix alpha when Andrew implements
AlphaTargetMachine::getTargetLowering().

llvm-svn: 30779
2006-10-06 22:50:56 +00:00
Chris Lattner
2ca01febcf Alpha uses a got
llvm-svn: 30778
2006-10-06 22:46:51 +00:00
Chris Lattner
b5b96302f2 jump tables handle pic
llvm-svn: 30776
2006-10-06 22:32:29 +00:00
Chris Lattner
ad60994822 print labels even if a MBB doesn't have a corresponding LLVM BB, just don't
print the LLVM BB label.

llvm-svn: 30775
2006-10-06 21:28:17 +00:00
Rafael Espindola
a96c205e12 add optional input flag to FMRRD
llvm-svn: 30774
2006-10-06 20:33:26 +00:00
Rafael Espindola
54301ca490 add support for calling functions that return double
llvm-svn: 30771
2006-10-06 19:10:05 +00:00
Evan Cheng
6d15f83d46 80 col violation.
llvm-svn: 30770
2006-10-06 18:57:51 +00:00
Chris Lattner
399106d8f8 ugly codegen
llvm-svn: 30769
2006-10-06 17:39:34 +00:00
Chris Lattner
0d39b3a4cf Fix a miscompilation of:
long long foo(long long X) {
  return (long long)(signed char)(int)X;
}

Instead of:

_foo:
        extsb r2, r4
        srawi r3, r4, 31
        mr r4, r2
        blr

we now produce:

_foo:
        extsb r4, r4
        srawi r3, r4, 31
        blr

This fixes a miscompilation in ConstantFolding.cpp.

llvm-svn: 30768
2006-10-06 17:34:12 +00:00
Rafael Espindola
d870b158b3 fix some bugs affecting functions with no arguments
llvm-svn: 30767
2006-10-06 17:26:30 +00:00
Rafael Espindola
f35563ff66 fix the stack alignment
llvm-svn: 30766
2006-10-06 14:29:47 +00:00
Rafael Espindola
f679bdf121 add support for calling functions that have double arguments
llvm-svn: 30765
2006-10-06 12:50:22 +00:00
Evan Cheng
9ce3d493f0 Still need to support -mcpu=<> or cross compilation will fail. Doh.
llvm-svn: 30764
2006-10-06 09:17:41 +00:00
Evan Cheng
6fc0ae2136 Do away with CPU feature list. Just use CPUID to detect MMX, SSE, SSE2, SSE3, and 64-bit support.
llvm-svn: 30763
2006-10-06 08:21:07 +00:00
Evan Cheng
35a3337e1d It appears the inline asm in GetCpuIDAndInfo() may clobbers some registers if it isn't inlined (at < -O3). Force it to be inlined.
llvm-svn: 30762
2006-10-06 07:50:56 +00:00
Chris Lattner
5fc3bb074c MachineBasicBlock::splice was incorrectly updating parent pointers on
instructions.

llvm-svn: 30760
2006-10-06 01:12:44 +00:00
Evan Cheng
275825195a Make use of getStore().
llvm-svn: 30759
2006-10-05 23:01:46 +00:00
Evan Cheng
c9e079d0c1 Add getStore() helper function to create ISD::STORE nodes.
llvm-svn: 30758
2006-10-05 22:57:11 +00:00
Chris Lattner
eca9897bd5 Don't crash if an MBB doesn't have an LLVM BB
llvm-svn: 30757
2006-10-05 21:40:14 +00:00
Rafael Espindola
2e4743b6d1 use a const ref for passing the vector to ArgumentLayout
llvm-svn: 30756
2006-10-05 17:46:48 +00:00
Rafael Espindola
f0e4950ef4 implement a ArgumentLayout class to factor code common to LowerFORMAL_ARGUMENTS and LowerCALL
implement FMDRR
add support for f64 function arguments

llvm-svn: 30754
2006-10-05 16:48:49 +00:00
Jim Laskey
3f9f064fd1 Alias analysis code clean ups.
llvm-svn: 30753
2006-10-05 15:07:25 +00:00
Chris Lattner
513ba43053 add a new SimplifyDemandedVectorElts method, which works similarly to
SimplifyDemandedBits.  The idea is that some operations can be simplified if
not all of the computed elements are needed.  Some targets (like x86) have a
large number of intrinsics that operate on a single element, but pass other
elts through unmodified.  If those other elements are not needed, the
intrinsics can be simplified to scalar operations, and insertelement ops can
be removed.

This turns (f.e.):

ushort %Convert_sse(float %f) {
        %tmp = insertelement <4 x float> undef, float %f, uint 0                ; <<4 x float>> [#uses=1]
        %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, uint 1             ; <<4 x float>> [#uses=1]
        %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, uint 2           ; <<4 x float>> [#uses=1]
        %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, uint 3           ; <<4 x float>> [#uses=1]
        %tmp28 = tail call <4 x float> %llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )               ; <<4 x float>> [#uses=1]
        %tmp37 = tail call <4 x float> %llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )               ; <<4 x float>> [#uses=1]
        %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )               ; <<4 x float>> [#uses=1]
        %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )          ; <<4 x float>> [#uses=1]
        %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )              ; <int> [#uses=1]
        %tmp69 = cast int %tmp to ushort                ; <ushort> [#uses=1]
        ret ushort %tmp69
}

into:

ushort %Convert_sse(float %f) {
entry:
        %tmp28 = sub float %f, 1.000000e+00             ; <float> [#uses=1]
        %tmp37 = mul float %tmp28, 5.000000e-01         ; <float> [#uses=1]
        %tmp375 = insertelement <4 x float> undef, float %tmp37, uint 0         ; <<4 x float>> [#uses=1]
        %tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )           ; <<4 x float>> [#uses=1]
        %tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )            ; <<4 x float>> [#uses=1]
        %tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )              ; <int> [#uses=1]
        %tmp69 = cast int %tmp to ushort                ; <ushort> [#uses=1]
        ret ushort %tmp69
}

which improves codegen from:

_Convert_sse:
        movss LCPI1_0, %xmm0
        movss 4(%esp), %xmm1
        subss %xmm0, %xmm1
        movss LCPI1_1, %xmm0
        mulss %xmm0, %xmm1
        movss LCPI1_2, %xmm0
        minss %xmm0, %xmm1
        xorps %xmm0, %xmm0
        maxss %xmm0, %xmm1
        cvttss2si %xmm1, %eax
        andl $65535, %eax
        ret

to:

_Convert_sse:
        movss 4(%esp), %xmm0
        subss LCPI1_0, %xmm0
        mulss LCPI1_1, %xmm0
        movss LCPI1_2, %xmm1
        minss %xmm1, %xmm0
        xorps %xmm1, %xmm1
        maxss %xmm1, %xmm0
        cvttss2si %xmm0, %eax
        andl $65535, %eax
        ret


This is just a first step, it can be extended in many ways.  Testcase here:
Transforms/InstCombine/vec_demanded_elts.ll

llvm-svn: 30752
2006-10-05 06:55:50 +00:00