mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-22 04:22:57 +02:00
69a9143584
by the SRoA "promote to large integer" code, eliminating some type conversions like this: %94 = zext i16 %93 to i32 ; <i32> [#uses=2] %96 = lshr i32 %94, 8 ; <i32> [#uses=1] %101 = trunc i32 %96 to i8 ; <i8> [#uses=1] This also unblocks other xforms from happening, now clang is able to compile: struct S { float A, B, C, D; }; float foo(struct S A) { return A.A + A.B+A.C+A.D; } into: _foo: ## @foo ## BB#0: ## %entry pshufd $1, %xmm0, %xmm2 addss %xmm0, %xmm2 movdqa %xmm1, %xmm3 addss %xmm2, %xmm3 pshufd $1, %xmm1, %xmm0 addss %xmm3, %xmm0 ret on x86-64, instead of: _foo: ## @foo ## BB#0: ## %entry movd %xmm0, %rax shrq $32, %rax movd %eax, %xmm2 addss %xmm0, %xmm2 movapd %xmm1, %xmm3 addss %xmm2, %xmm3 movd %xmm1, %rax shrq $32, %rax movd %eax, %xmm0 addss %xmm3, %xmm0 ret This seems pretty close to optimal to me, at least without using horizontal adds. This also triggers in lots of other code, including SPEC. llvm-svn: 112278 |
||
---|---|---|
.. | ||
ABCD | ||
ADCE | ||
ArgumentPromotion | ||
BlockPlacement | ||
BranchFolding | ||
CodeExtractor | ||
CodeGenPrepare | ||
ConstantMerge | ||
ConstProp | ||
DeadArgElim | ||
DeadStoreElimination | ||
FunctionAttrs | ||
GlobalDCE | ||
GlobalOpt | ||
GVN | ||
IndVarSimplify | ||
Inline | ||
InstCombine | ||
Internalize | ||
IPConstantProp | ||
JumpThreading | ||
LCSSA | ||
LICM | ||
LoopDeletion | ||
LoopIndexSplit | ||
LoopRotate | ||
LoopSimplify | ||
LoopStrengthReduce | ||
LoopUnroll | ||
LoopUnswitch | ||
LowerAtomic | ||
LowerInvoke | ||
LowerSetJmp | ||
LowerSwitch | ||
Mem2Reg | ||
MemCpyOpt | ||
MergeFunc | ||
PartialSpecialize | ||
PruneEH | ||
Reassociate | ||
ScalarRepl | ||
SCCP | ||
SimplifyCFG | ||
SimplifyLibCalls | ||
Sink | ||
SRETPromotion | ||
SSI | ||
StripSymbols | ||
TailCallElim | ||
TailDup |