Chris Lattner
578b634f56
implement an optimization to codegen c ? 1.0 : 2.0 as load { 2.0, 1.0 } + c*4.
...
For 2009-03-07-FPConstSelect.ll we now produce:
_f:
xorl %eax, %eax
testl %edi, %edi
movl $4, %ecx
cmovne %rax, %rcx
leaq LCPI1_0(%rip), %rax
movss (%rcx,%rax), %xmm0
ret
previously we produced:
_f:
subl $4, %esp
cmpl $0, 8(%esp)
movss LCPI1_0, %xmm0
je LBB1_2 ## entry
LBB1_1: ## entry
movss LCPI1_1, %xmm0
LBB1_2: ## entry
movss %xmm0, (%esp)
flds (%esp)
addl $4, %esp
ret
on PPC the code also improves to:
_f:
cntlzw r2, r3
srwi r2, r2, 5
li r3, lo16(LCPI1_0)
slwi r2, r2, 2
addis r3, r3, ha16(LCPI1_0)
lfsx f1, r3, r2
blr
from:
_f:
li r2, lo16(LCPI1_1)
cmplwi cr0, r3, 0
addis r2, r2, ha16(LCPI1_1)
beq cr0, LBB1_2 ; entry
LBB1_1: ; entry
li r2, lo16(LCPI1_0)
addis r2, r2, ha16(LCPI1_0)
LBB1_2: ; entry
lfs f1, 0(r2)
blr
This also improves the existing pic-cpool case from:
foo:
subl $12, %esp
call .Lllvm$1.$piclabel
.Lllvm$1.$piclabel:
popl %eax
addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
cmpl $0, 16(%esp)
movsd .LCPI1_0@GOTOFF(%eax), %xmm0
je .LBB1_2 # entry
.LBB1_1: # entry
movsd .LCPI1_1@GOTOFF(%eax), %xmm0
.LBB1_2: # entry
movsd %xmm0, (%esp)
fldl (%esp)
addl $12, %esp
ret
to:
foo:
call .Lllvm$1.$piclabel
.Lllvm$1.$piclabel:
popl %eax
addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
xorl %ecx, %ecx
cmpl $0, 4(%esp)
movl $8, %edx
cmovne %ecx, %edx
fldl .LCPI1_0@GOTOFF(%eax,%edx)
ret
This triggers a few dozen times in spec FP 2000.
llvm-svn: 66358
2009-03-08 01:51:30 +00:00