2007-03-22 19:42:45 +01:00
|
|
|
//===---------------------------------------------------------------------===//
|
|
|
|
// Random ideas for the X86 backend: MMX-specific stuff.
|
|
|
|
//===---------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===---------------------------------------------------------------------===//
|
2007-04-24 23:20:03 +02:00
|
|
|
|
|
|
|
This:
|
|
|
|
|
|
|
|
#include <mmintrin.h>
|
|
|
|
|
|
|
|
__v2si qux(int A) {
|
|
|
|
return (__v2si){ 0, A };
|
|
|
|
}
|
|
|
|
|
|
|
|
is compiled into:
|
|
|
|
|
|
|
|
_qux:
|
|
|
|
subl $28, %esp
|
|
|
|
movl 32(%esp), %eax
|
|
|
|
movd %eax, %mm0
|
|
|
|
movq %mm0, (%esp)
|
|
|
|
movl (%esp), %eax
|
|
|
|
movl %eax, 20(%esp)
|
|
|
|
movq %mm0, 8(%esp)
|
|
|
|
movl 12(%esp), %eax
|
|
|
|
movl %eax, 16(%esp)
|
|
|
|
movq 16(%esp), %mm0
|
|
|
|
addl $28, %esp
|
|
|
|
ret
|
|
|
|
|
|
|
|
Yuck!
|
|
|
|
|
|
|
|
GCC gives us:
|
|
|
|
|
|
|
|
_qux:
|
|
|
|
subl $12, %esp
|
|
|
|
movl 16(%esp), %eax
|
|
|
|
movl 20(%esp), %edx
|
|
|
|
movl $0, (%eax)
|
|
|
|
movl %edx, 4(%eax)
|
|
|
|
addl $12, %esp
|
|
|
|
ret $4
|
2007-05-02 23:42:20 +02:00
|
|
|
|
2009-02-22 09:13:45 +01:00
|
|
|
//===---------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
We generate crappy code for this:
|
|
|
|
|
|
|
|
__m64 t() {
|
|
|
|
return _mm_cvtsi32_si64(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
_t:
|
|
|
|
subl $12, %esp
|
|
|
|
movl $1, %eax
|
|
|
|
movd %eax, %mm0
|
|
|
|
movq %mm0, (%esp)
|
|
|
|
movl (%esp), %eax
|
|
|
|
movl 4(%esp), %edx
|
|
|
|
addl $12, %esp
|
|
|
|
ret
|
|
|
|
|
|
|
|
The extra stack traffic is covered in the previous entry. But the other reason
|
|
|
|
is we are not smart about materializing constants in MMX registers. With -m64
|
|
|
|
|
|
|
|
movl $1, %eax
|
|
|
|
movd %eax, %mm0
|
|
|
|
movd %mm0, %rax
|
|
|
|
ret
|
|
|
|
|
|
|
|
We should be using a constantpool load instead:
|
|
|
|
movq LC0(%rip), %rax
|