1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00
Commit Graph

69270 Commits

Author SHA1 Message Date
Chris Lattner
74ed5d30ca implement an instcombine xform that canonicalizes casts outside of and-with-constant operations.
This fixes rdar://8808586 which observed that we used to compile:


union xy {
        struct x { _Bool b[15]; } x;
        __attribute__((packed))
        struct y {
                __attribute__((packed)) unsigned long b0to7;
                __attribute__((packed)) unsigned int b8to11;
                __attribute__((packed)) unsigned short b12to13;
                __attribute__((packed)) unsigned char b14;
        } y;
};

struct x
foo(union xy *xy)
{
        return xy->x;
}

into:

_foo:                                   ## @foo
	movq	(%rdi), %rax
	movabsq	$1095216660480, %rcx    ## imm = 0xFF00000000
	andq	%rax, %rcx
	movabsq	$-72057594037927936, %rdx ## imm = 0xFF00000000000000
	andq	%rax, %rdx
	movzbl	%al, %esi
	orq	%rdx, %rsi
	movq	%rax, %rdx
	andq	$65280, %rdx            ## imm = 0xFF00
	orq	%rsi, %rdx
	movq	%rax, %rsi
	andq	$16711680, %rsi         ## imm = 0xFF0000
	orq	%rdx, %rsi
	movl	%eax, %edx
	andl	$-16777216, %edx        ## imm = 0xFFFFFFFFFF000000
	orq	%rsi, %rdx
	orq	%rcx, %rdx
	movabsq	$280375465082880, %rcx  ## imm = 0xFF0000000000
	movq	%rax, %rsi
	andq	%rcx, %rsi
	orq	%rdx, %rsi
	movabsq	$71776119061217280, %r8 ## imm = 0xFF000000000000
	andq	%r8, %rax
	orq	%rsi, %rax
	movzwl	12(%rdi), %edx
	movzbl	14(%rdi), %esi
	shlq	$16, %rsi
	orl	%edx, %esi
	movq	%rsi, %r9
	shlq	$32, %r9
	movl	8(%rdi), %edx
	orq	%r9, %rdx
	andq	%rdx, %rcx
	movzbl	%sil, %esi
	shlq	$32, %rsi
	orq	%rcx, %rsi
	movl	%edx, %ecx
	andl	$-16777216, %ecx        ## imm = 0xFFFFFFFFFF000000
	orq	%rsi, %rcx
	movq	%rdx, %rsi
	andq	$16711680, %rsi         ## imm = 0xFF0000
	orq	%rcx, %rsi
	movq	%rdx, %rcx
	andq	$65280, %rcx            ## imm = 0xFF00
	orq	%rsi, %rcx
	movzbl	%dl, %esi
	orq	%rcx, %rsi
	andq	%r8, %rdx
	orq	%rsi, %rdx
	ret

We now compile this into:

_foo:                                   ## @foo
## BB#0:                                ## %entry
	movzwl	12(%rdi), %eax
	movzbl	14(%rdi), %ecx
	shlq	$16, %rcx
	orl	%eax, %ecx
	shlq	$32, %rcx
	movl	8(%rdi), %edx
	orq	%rcx, %rdx
	movq	(%rdi), %rax
	ret

A small improvement :-)

llvm-svn: 123520
2011-01-15 06:32:33 +00:00
Chris Lattner
089d215cb3 fix typo
llvm-svn: 123519
2011-01-15 06:27:35 +00:00
Chris Lattner
934c574ef9 Fix m_Not and m_Neg to not match random ConstantInt's. Before
these would try hard to match constants by inverting the bits
and recursively matching.  There are two problems with this:
1) some patterns would match when we didn't want them to (theoretical)
2) this is insanely expensive to do, and most often pointless.

This was apparently useful in just 2 instcombine cases, which I
added code to handle explicitly.  This change speeds up 'opt'
time on 176.gcc by 1% and produces bitwise identical code.

llvm-svn: 123518
2011-01-15 05:52:27 +00:00
Chris Lattner
0868c29c36 one more instcombine variant that is needed to work with future changes,
no functionality change currently.

llvm-svn: 123517
2011-01-15 05:50:18 +00:00
Chris Lattner
360fedf20a fix typo
llvm-svn: 123516
2011-01-15 05:42:47 +00:00
Chris Lattner
ca796e7838 Catch ~x < cst just like ~x < ~y, we currently handle this through
means that are about to disappear.

llvm-svn: 123515
2011-01-15 05:41:33 +00:00
Chris Lattner
06849c1228 reduce indentation
llvm-svn: 123514
2011-01-15 05:40:29 +00:00
Eric Christopher
d675e0b362 80-col.
llvm-svn: 123505
2011-01-15 00:25:09 +00:00
Chris Lattner
e6d5b3c4ce Generalize LoadAndStorePromoter a bit and switch LICM
to use it.

llvm-svn: 123501
2011-01-15 00:12:35 +00:00
Bob Wilson
e6b8ba1ae4 Fix a comment.
llvm-svn: 123497
2011-01-15 00:09:18 +00:00
Eric Christopher
b00cef51d8 Fix 80-cols.
llvm-svn: 123494
2011-01-14 23:50:53 +00:00
Ted Kremenek
c9d2425c5a Update CMake build.
llvm-svn: 123491
2011-01-14 22:58:11 +00:00
Bob Wilson
8ca3bc94ca Fix some tablegen issues to allow using zero_reg for InstAlias definitions.
This is needed to allow an InstAlias for an instruction with an "OptionalDef"
result register (like ARM's cc_out) where you want to set the optional register
to reg0.

llvm-svn: 123490
2011-01-14 22:58:09 +00:00
Ted Kremenek
a81e8e4f91 Fix memory leak found by clang static analyzer.
llvm-svn: 123487
2011-01-14 22:34:17 +00:00
Ted Kremenek
4b09cdedb2 'HiReg' is written but never read. Nuke its
declaration and its assignments.

Found by clang static analyzer.

llvm-svn: 123486
2011-01-14 22:34:13 +00:00
Owen Anderson
63902f2c99 Fix a false-positive warning.
llvm-svn: 123480
2011-01-14 22:31:13 +00:00
Dan Gohman
a4f2631ea9 Delete an assignment to ThisBB which isn't needed, and tidy up some
comments.

llvm-svn: 123479
2011-01-14 22:26:16 +00:00
Owen Anderson
51dcd56f96 Enhance GlobalOpt to be able evaluate initializers that involve stores through
bitcasts, at least in simple cases.  This fixes clang's CodeGenCXX/virtual-base-dtor.cpp

llvm-svn: 123477
2011-01-14 22:19:20 +00:00
Anton Korobeynikov
1f9df99db1 Add a possibility to switch between CFI directives- and table-based frame description emission. Currently all the backends use table-based stuff.
llvm-svn: 123476
2011-01-14 21:58:08 +00:00
Anton Korobeynikov
6b2f110a3d Cleanup
llvm-svn: 123475
2011-01-14 21:57:58 +00:00
Anton Korobeynikov
ef11a77938 Add CFI directives-based frame information emission. Not hooked yet.
llvm-svn: 123474
2011-01-14 21:57:53 +00:00
Anton Korobeynikov
e53322ef91 Split stuff as a preparation for CFI directives-based frame information emission
llvm-svn: 123473
2011-01-14 21:57:45 +00:00
Anton Korobeynikov
aeeec825cd Use common style for .cfi directives
llvm-svn: 123472
2011-01-14 21:57:39 +00:00
Andrew Trick
a0e69757d1 Support for precise scheduling of the instruction selection DAG,
disabled in this checkin. Sorry for the large diffs due to
refactoring. New functionality is all guarded by EnableSchedCycles.

Scheduling the isel DAG is inherently imprecise, but we give it a best
effort:
- Added MayReduceRegPressure to allow stalled nodes in the queue only
  if there is a regpressure need.
- Added BUHasStall to allow checking for either dependence stalls due to
  latency or resource stalls due to pipeline hazards.
- Added BUCompareLatency to encapsulate and standardize the heuristics
  for minimizing stall cycles (vs. reducing register pressure).
- Modified the bottom-up heuristic (now in BUCompareLatency) to
  prioritize nodes by their depth rather than height. As long as it
  doesn't stall, height is irrelevant. Depth represents the critical
  path to the DAG root.
- Added hybrid_ls_rr_sort::isReady to filter stalled nodes before
  adding them to the available queue.

Related Cleanup: most of the register reduction routines do not need
to be templates.

llvm-svn: 123468
2011-01-14 21:11:41 +00:00
Chris Lattner
1ce35a0362 switch SRoA to use LoadAndStorePromoter instead of its own copy of the code.
llvm-svn: 123457
2011-01-14 19:50:47 +00:00
Chris Lattner
2cf8e75d34 Add a new LoadAndStorePromoter class, which implements the general
"promote a bunch of load and stores" logic, allowing the code to
be shared and reused.

llvm-svn: 123456
2011-01-14 19:36:13 +00:00
Jay Foad
41923da581 OperandTraits<>::Layout isn't used for anything. Remove it.
llvm-svn: 123452
2011-01-14 18:41:56 +00:00
Rafael Espindola
70a277119f Update llvm-gcc's tests.
llvm-svn: 123447
2011-01-14 17:01:20 +00:00
Oscar Fuentes
2e491f8d47 Reorder macros on config.h.cmake to easily compare it against
config.h.in.

Patch by arrowdodger!

llvm-svn: 123445
2011-01-14 16:41:03 +00:00
Devang Patel
e9e4346064 Disable debug mode.
llvm-svn: 123443
2011-01-14 15:55:50 +00:00
Duncan Sands
dc51b0ee48 Turn X-(X-Y) into Y. According to my auto-simplifier this is the most common
simplification present in fully optimized code (I think instcombine fails to
transform some of these when "X-Y" has more than one use).  Fires here and
there all over the test-suite, for example it eliminates 8 subtractions in
the final IR for 445.gobmk, 2 subs in 447.dealII, 2 in paq8p etc.

llvm-svn: 123442
2011-01-14 15:26:10 +00:00
Duncan Sands
4757061c47 Factorize common code out of the InstructionSimplify shift logic. Add in
threading of shifts over selects and phis while there.  This fires here and
there in the testsuite, to not much effect.  For example when compiling spirit
it fires 5 times, during early-cse, resulting in 6 more cse simplifications,
and 3 more terminators being folded by jump threading, but the final bitcode
doesn't change in any interesting way: other optimizations would have caught
the opportunity anyway, only later.

llvm-svn: 123441
2011-01-14 14:44:12 +00:00
Duncan Sands
01be7e406d Rename this test.
llvm-svn: 123440
2011-01-14 14:16:33 +00:00
Chris Lattner
c0bd89331e switch the second scalarrepl pass to use SSAUpdater. We run two scalarrepl passes: one
early in the cleanup code and one late interlaced with the inliner.  The second one is
important because inlining and other scalar optzns can unpin allocas, allowing them to 
be split up and promoted.  While important for performance, this is also relatively
rare, and we would previously force a (non-lazy) computation of DomFrontiers, which 
happened even if nothing became unpinned.

With this patch, the first pass of scalarrepl still promotes the vast bulk of allocas
in programs, but hte second pass has changed to use SSAUpdater, which is more "sparse"
and lazy.  This speeds up opt -O3 time on kimwitu++ (a c++ app) by about 1%.  The
numbers are interesting: the first pass promotes ~17500 allocas.  The second pass
promotes about 1600.  For non-C++ codes, the compile time win should be greater, 
because the second pass of scalarrepl does less.

llvm-svn: 123437
2011-01-14 08:21:08 +00:00
Chris Lattner
8e171470d3 split SROA into two passes: one that uses DomFrontiers (-scalarrepl)
and one that uses SSAUpdater (-scalarrepl-ssa)

llvm-svn: 123436
2011-01-14 08:13:00 +00:00
Jay Foad
fa61721cf2 Remove casts between Value** and Constant**, which won't work if a
static_cast from Constant* to Value* has to adjust the "this" pointer.
This is groundwork for PR889.

llvm-svn: 123435
2011-01-14 08:07:43 +00:00
Chris Lattner
b5c39352d8 Implement full support for promoting allocas to registers using SSAUpdater
instead of DomTree/DomFrontier.  This may be interesting for reducing compile 
time.  This is currently disabled, but seems to work just fine.

When this is enabled, we eliminate two runs of dominator frontier, one in the
"early per-function" optimizations and one in the "interlaced with inliner"
function passes.

llvm-svn: 123434
2011-01-14 07:50:47 +00:00
Chris Lattner
de9ec03027 relax testcase a bit.
llvm-svn: 123433
2011-01-14 07:46:33 +00:00
Jakob Stoklund Olesen
9f5e00f957 Try for the third time to teach getFirstTerminator() about debug values.
This time let's rephrase to trick gcc-4.3 into not miscompiling.

llvm-svn: 123432
2011-01-14 06:33:45 +00:00
Chris Lattner
eba719204c revert my fastisel patch again which apparently still gives the
llvm-gcc-i386-linux-selfhost buildbot heartburn...

llvm-svn: 123431
2011-01-14 06:14:33 +00:00
Chris Lattner
ee950eeb24 reapply r123414 now that the botz are calmed down and the fix is already in.
llvm-svn: 123427
2011-01-14 04:24:28 +00:00
Chris Lattner
b1ba935526 indentation
llvm-svn: 123426
2011-01-14 04:23:53 +00:00
Evan Cheng
0cdd5547f1 Completed :lower16: / :upper16: support for movw / movt pairs on Darwin.
- Fixed :upper16: fix up routine. It should be shifting down the top 16 bits first.
- Added support for Thumb2 :lower16: and :upper16: fix up.
- Added :upper16: and :lower16: relocation support to mach-o object writer.

llvm-svn: 123424
2011-01-14 02:38:49 +00:00
Jakob Stoklund Olesen
99ad62ed9e Revert r123419. It still breaks llvm-gcc-i386-linux-selfhost.
llvm-svn: 123423
2011-01-14 02:12:54 +00:00
Chris Lattner
349735530b r123414 broke llvm-gcc bootstrap apparently, revert
llvm-svn: 123422
2011-01-14 02:07:32 +00:00
Chris Lattner
a0074ca5fc Set the insertion point correctly for instructions generated by load folding:
they should go *before* the new instruction not after it. 

llvm-svn: 123420
2011-01-14 01:33:40 +00:00
Jakob Stoklund Olesen
3d8deb13ee Try again to teach getFirstTerminator() about debug values.
Fix some callers to better deal with debug values.

llvm-svn: 123419
2011-01-14 01:17:53 +00:00
Owen Anderson
6de2a4d67c Rather than doing early instcombine, try doing early CSE instead. This should still handle
most important simplifications, as well as resolving phase ordering issues where instcombine
would inhibit important CSE'ing opportunities, for instance on BitBench/drop3.

llvm-svn: 123418
2011-01-14 00:41:11 +00:00
Duncan Sands
44c273d907 Move some shift transforms out of instcombine and into InstructionSimplify.
While there, I noticed that the transform "undef >>a X -> undef" was wrong.
For example if X is 2 then the top two bits must be equal, so the result can
not be anything.  I fixed this in the constant folder as well.  Also, I made
the transform for "X << undef" stronger: it now folds to undef always, even
though X might be zero.  This is in accordance with the LangRef, but I must
admit that it is fairly aggressive.  Also, I added "i32 X << 32 -> undef"
following the LangRef and the constant folder, likewise fairly aggressive.

llvm-svn: 123417
2011-01-14 00:37:45 +00:00
Owen Anderson
e9841116c0 Don't bother conditionalizing the use of SROA in -O1 mode. We're already running it unconditionally
later in the pipeline.

llvm-svn: 123416
2011-01-14 00:36:40 +00:00