From 99c7c3ad2f9145471801e77ef4418bce12591ef1 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 19 Apr 2006 05:53:27 +0000 Subject: [PATCH] Add a note. llvm-svn: 27827 --- lib/Target/X86/README.txt | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 67bc04ff71a..6dfa084bc44 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -996,3 +996,61 @@ be able eliminate one of the movaps: movaps %xmm3, %xmm2 movaps %xmm4, %xmm3 jne LBB_main_4 # cond_true44 + +//===---------------------------------------------------------------------===// + +Use the 0's in the top part of movss from memory (and from other instructions +that generate them) to build vectors more efficiently. Consider: + +vector float test(float a) { + return (vector float){ 0.0, a, 0.0, 0.0}; +} + +We currently generate this as: + +_test: + sub %ESP, 28 + movss %XMM0, DWORD PTR [%ESP + 32] + movss DWORD PTR [%ESP + 4], %XMM0 + mov DWORD PTR [%ESP + 12], 0 + mov DWORD PTR [%ESP + 8], 0 + mov DWORD PTR [%ESP], 0 + movaps %XMM0, XMMWORD PTR [%ESP] + add %ESP, 28 + ret + +Something like this should be sufficient: + +_test: + movss %XMM0, DWORD PTR [%ESP + 4] + shufps %XMM0, %XMM0, 81 + ret + +... which takes advantage of the zero elements provided by movss. +Even xoring a register and shufps'ing IT would be better than the +above code. + +Likewise, for this: + +vector float test(float a, float b) { + return (vector float){ b, a, 0.0, 0.0}; +} + +_test: + pxor %XMM0, %XMM0 + movss %XMM1, %XMM0 + movss %XMM2, DWORD PTR [%ESP + 4] + unpcklps %XMM2, %XMM1 + movss %XMM0, DWORD PTR [%ESP + 8] + unpcklps %XMM0, %XMM1 + unpcklps %XMM0, %XMM2 + ret + +... where we do use pxor, it would be better to use the zero'd +elements that movss provides to turn this into 2 shufps's instead +of 3 unpcklps's. + +Another example: {0.0, 0.0, a, b } + +//===---------------------------------------------------------------------===// +