1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

Add a note.

llvm-svn: 27827
This commit is contained in:
Chris Lattner 2006-04-19 05:53:27 +00:00
parent 411111435e
commit 99c7c3ad2f

View File

@ -996,3 +996,61 @@ be able eliminate one of the movaps:
movaps %xmm3, %xmm2
movaps %xmm4, %xmm3
jne LBB_main_4 # cond_true44
//===---------------------------------------------------------------------===//
Use the 0's in the top part of movss from memory (and from other instructions
that generate them) to build vectors more efficiently. Consider:
vector float test(float a) {
return (vector float){ 0.0, a, 0.0, 0.0};
}
We currently generate this as:
_test:
sub %ESP, 28
movss %XMM0, DWORD PTR [%ESP + 32]
movss DWORD PTR [%ESP + 4], %XMM0
mov DWORD PTR [%ESP + 12], 0
mov DWORD PTR [%ESP + 8], 0
mov DWORD PTR [%ESP], 0
movaps %XMM0, XMMWORD PTR [%ESP]
add %ESP, 28
ret
Something like this should be sufficient:
_test:
movss %XMM0, DWORD PTR [%ESP + 4]
shufps %XMM0, %XMM0, 81
ret
... which takes advantage of the zero elements provided by movss.
Even xoring a register and shufps'ing IT would be better than the
above code.
Likewise, for this:
vector float test(float a, float b) {
return (vector float){ b, a, 0.0, 0.0};
}
_test:
pxor %XMM0, %XMM0
movss %XMM1, %XMM0
movss %XMM2, DWORD PTR [%ESP + 4]
unpcklps %XMM2, %XMM1
movss %XMM0, DWORD PTR [%ESP + 8]
unpcklps %XMM0, %XMM1
unpcklps %XMM0, %XMM2
ret
... where we do use pxor, it would be better to use the zero'd
elements that movss provides to turn this into 2 shufps's instead
of 3 unpcklps's.
Another example: {0.0, 0.0, a, b }
//===---------------------------------------------------------------------===//