llvm-mirror/test/CodeGen/X86/v2f32.ll

; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64
; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32

; PR7518
define void @test1(<2 x float> %Q, float *%P2) nounwind {
  %a = extractelement <2 x float> %Q, i32 0
  %b = extractelement <2 x float> %Q, i32 1
  %c = fadd float %a, %b

  store float %c, float* %P2
  ret void
; X64: test1:
; X64-NEXT: pshufd	$1, %xmm0, %xmm1
; X64-NEXT: addss	%xmm0, %xmm1
; X64-NEXT: movss	%xmm1, (%rdi)
; X64-NEXT: ret

; W64: test1:
; W64-NEXT: movdqa  (%rcx), %xmm0
; W64-NEXT: pshufd  $1, %xmm0, %xmm1
; W64-NEXT: addss   %xmm0, %xmm1
; W64-NEXT: movss   %xmm1, (%rdx)
; W64-NEXT: ret

; X32: test1:
; X32-NEXT: pshufd	$1, %xmm0, %xmm1
; X32-NEXT: addss	%xmm0, %xmm1
; X32-NEXT: movl	4(%esp), %eax
; X32-NEXT: movss	%xmm1, (%eax)
; X32-NEXT: ret
}


define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {
  %Z = fadd <2 x float> %Q, %R
  ret <2 x float> %Z
  
; X64: test2:
; X64-NEXT: addps	%xmm1, %xmm0
; X64-NEXT: ret

; W64: test2:
; W64-NEXT: movaps  (%rcx), %xmm0
; W64-NEXT: addps   (%rdx), %xmm0
; W64-NEXT: ret

; X32: test2:
; X32:      addps	%xmm1, %xmm0
}


define <2 x float> @test3(<4 x float> %A) nounwind {
	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	%C = fadd <2 x float> %B, %B
	ret <2 x float> %C
; X64: test3:
; X64-NEXT: addps	%xmm0, %xmm0
; X64-NEXT: ret

; W64: test3:
; W64-NEXT: movaps  (%rcx), %xmm0
; W64-NEXT: addps   %xmm0, %xmm0
; W64-NEXT: ret

; X32: test3:
; X32-NEXT: addps	%xmm0, %xmm0
; X32-NEXT: ret
}

define <2 x float> @test4(<2 x float> %A) nounwind {
	%C = fadd <2 x float> %A, %A
	ret <2 x float> %C
; X64: test4:
; X64-NEXT: addps	%xmm0, %xmm0
; X64-NEXT: ret

; W64: test4:
; W64-NEXT: movaps  (%rcx), %xmm0
; W64-NEXT: addps   %xmm0, %xmm0
; W64-NEXT: ret

; X32: test4:
; X32-NEXT: addps	%xmm0, %xmm0
; X32-NEXT: ret
}

define <4 x float> @test5(<4 x float> %A) nounwind {
	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	%C = fadd <2 x float> %B, %B
        br label %BB
        
BB:
        %D = fadd <2 x float> %C, %C
	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
	ret <4 x float> %E
        
; X64: test5:
; X64-NEXT: addps	%xmm0, %xmm0
; X64-NEXT: addps	%xmm0, %xmm0
; X64-NEXT: ret

; W64: test5:
; W64-NEXT: movaps  (%rcx), %xmm0
; W64-NEXT: addps   %xmm0, %xmm0
; W64-NEXT: addps   %xmm0, %xmm0
; W64-NEXT: ret

; X32: test5:
; X32-NEXT: addps	%xmm0, %xmm0
; X32-NEXT: addps	%xmm0, %xmm0
; X32-NEXT: ret
}
test/CodeGen/X86: Add a pattern for Win64. llvm-svn: 127733 2011-03-16 14:52:51 +01:00			`; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - \| FileCheck %s -check-prefix=X64`
			`; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - \| FileCheck %s -check-prefix=W64`
Attempt to fix -mtriple=i686-{cygwin\|mingw\|win32} regressions. Nakamura, if this doesn't work, please provide more details. llvm-svn: 140107 2011-09-20 02:08:12 +02:00			`; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -asm-verbose=0 -o - \| FileCheck %s -check-prefix=X32`
fix PR7518 - terrible codegen of <2 x float>, by only marking v2f32 as legal in 32-bit mode. It is just as terrible there, but I just care about x86-64 and noone claims it is valuable in 64-bit mode. llvm-svn: 107600 2010-07-05 00:57:10 +02:00
			`; PR7518`
			`define void @test1(<2 x float> %Q, float *%P2) nounwind {`
			`%a = extractelement <2 x float> %Q, i32 0`
			`%b = extractelement <2 x float> %Q, i32 1`
			`%c = fadd float %a, %b`

			`store float %c, float* %P2`
			`ret void`
Just rip v2f32 support completely out of the X86 backend. In the example in the testcase, we now generate: _test1: ## @test1 movss 4(%esp), %xmm0 addss 8(%esp), %xmm0 movl 12(%esp), %eax movss %xmm0, (%eax) ret instead of: _test1: ## @test1 subl $20, %esp movl 24(%esp), %eax movq %mm0, (%esp) movq %mm0, 8(%esp) movss (%esp), %xmm0 addss 12(%esp), %xmm0 movss %xmm0, (%eax) addl $20, %esp ret v2f32 support did not work reliably because most of the X86 backend didn't know it was legal. It was apparently only added to support returning source-level v2f32 values in MMX registers in x86-32 mode. If ABI compatibility is important on this GCC-extended-vector type for some reason, then the frontend should generate IR that returns v2i32 instead of v2f32. However, we generally don't try very hard to be abi compatible on gcc extended vectors. llvm-svn: 107601 2010-07-05 01:07:25 +02:00			`; X64: test1:`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 00:49:25 +02:00			`; X64-NEXT: pshufd $1, %xmm0, %xmm1`
			`; X64-NEXT: addss %xmm0, %xmm1`
			`; X64-NEXT: movss %xmm1, (%rdi)`
Just rip v2f32 support completely out of the X86 backend. In the example in the testcase, we now generate: _test1: ## @test1 movss 4(%esp), %xmm0 addss 8(%esp), %xmm0 movl 12(%esp), %eax movss %xmm0, (%eax) ret instead of: _test1: ## @test1 subl $20, %esp movl 24(%esp), %eax movq %mm0, (%esp) movq %mm0, 8(%esp) movss (%esp), %xmm0 addss 12(%esp), %xmm0 movss %xmm0, (%eax) addl $20, %esp ret v2f32 support did not work reliably because most of the X86 backend didn't know it was legal. It was apparently only added to support returning source-level v2f32 values in MMX registers in x86-32 mode. If ABI compatibility is important on this GCC-extended-vector type for some reason, then the frontend should generate IR that returns v2i32 instead of v2f32. However, we generally don't try very hard to be abi compatible on gcc extended vectors. llvm-svn: 107601 2010-07-05 01:07:25 +02:00			`; X64-NEXT: ret`

test/CodeGen/X86: Add a pattern for Win64. llvm-svn: 127733 2011-03-16 14:52:51 +01:00			`; W64: test1:`
			`; W64-NEXT: movdqa (%rcx), %xmm0`
			`; W64-NEXT: pshufd $1, %xmm0, %xmm1`
			`; W64-NEXT: addss %xmm0, %xmm1`
			`; W64-NEXT: movss %xmm1, (%rdx)`
			`; W64-NEXT: ret`

Just rip v2f32 support completely out of the X86 backend. In the example in the testcase, we now generate: _test1: ## @test1 movss 4(%esp), %xmm0 addss 8(%esp), %xmm0 movl 12(%esp), %eax movss %xmm0, (%eax) ret instead of: _test1: ## @test1 subl $20, %esp movl 24(%esp), %eax movq %mm0, (%esp) movq %mm0, 8(%esp) movss (%esp), %xmm0 addss 12(%esp), %xmm0 movss %xmm0, (%eax) addl $20, %esp ret v2f32 support did not work reliably because most of the X86 backend didn't know it was legal. It was apparently only added to support returning source-level v2f32 values in MMX registers in x86-32 mode. If ABI compatibility is important on this GCC-extended-vector type for some reason, then the frontend should generate IR that returns v2i32 instead of v2f32. However, we generally don't try very hard to be abi compatible on gcc extended vectors. llvm-svn: 107601 2010-07-05 01:07:25 +02:00			`; X32: test1:`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 00:49:25 +02:00			`; X32-NEXT: pshufd $1, %xmm0, %xmm1`
			`; X32-NEXT: addss %xmm0, %xmm1`
			`; X32-NEXT: movl 4(%esp), %eax`
			`; X32-NEXT: movss %xmm1, (%eax)`
Just rip v2f32 support completely out of the X86 backend. In the example in the testcase, we now generate: _test1: ## @test1 movss 4(%esp), %xmm0 addss 8(%esp), %xmm0 movl 12(%esp), %eax movss %xmm0, (%eax) ret instead of: _test1: ## @test1 subl $20, %esp movl 24(%esp), %eax movq %mm0, (%esp) movq %mm0, 8(%esp) movss (%esp), %xmm0 addss 12(%esp), %xmm0 movss %xmm0, (%eax) addl $20, %esp ret v2f32 support did not work reliably because most of the X86 backend didn't know it was legal. It was apparently only added to support returning source-level v2f32 values in MMX registers in x86-32 mode. If ABI compatibility is important on this GCC-extended-vector type for some reason, then the frontend should generate IR that returns v2i32 instead of v2f32. However, we generally don't try very hard to be abi compatible on gcc extended vectors. llvm-svn: 107601 2010-07-05 01:07:25 +02:00			`; X32-NEXT: ret`
fix PR7518 - terrible codegen of <2 x float>, by only marking v2f32 as legal in 32-bit mode. It is just as terrible there, but I just care about x86-64 and noone claims it is valuable in 64-bit mode. llvm-svn: 107600 2010-07-05 00:57:10 +02:00			`}`

another v2f32 case, in this case showing poor codegen. llvm-svn: 107614 2010-07-05 07:52:56 +02:00
			`define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {`
			`%Z = fadd <2 x float> %Q, %R`
			`ret <2 x float> %Z`

			`; X64: test2:`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 00:49:25 +02:00			`; X64-NEXT: addps %xmm1, %xmm0`
another v2f32 case, in this case showing poor codegen. llvm-svn: 107614 2010-07-05 07:52:56 +02:00			`; X64-NEXT: ret`
test/CodeGen/X86: Add a pattern for Win64. llvm-svn: 127733 2011-03-16 14:52:51 +01:00
			`; W64: test2:`
			`; W64-NEXT: movaps (%rcx), %xmm0`
			`; W64-NEXT: addps (%rdx), %xmm0`
			`; W64-NEXT: ret`

			`; X32: test2:`
			`; X32: addps %xmm1, %xmm0`
another v2f32 case, in this case showing poor codegen. llvm-svn: 107614 2010-07-05 07:52:56 +02:00			`}`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 00:49:25 +02:00

			`define <2 x float> @test3(<4 x float> %A) nounwind {`
			`%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>`
			`%C = fadd <2 x float> %B, %B`
			`ret <2 x float> %C`
test/CodeGen/X86: Add a pattern for Win64. llvm-svn: 127733 2011-03-16 14:52:51 +01:00			`; X64: test3:`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: ret`

			`; W64: test3:`
			`; W64-NEXT: movaps (%rcx), %xmm0`
			`; W64-NEXT: addps %xmm0, %xmm0`
			`; W64-NEXT: ret`

			`; X32: test3:`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: ret`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 00:49:25 +02:00			`}`

			`define <2 x float> @test4(<2 x float> %A) nounwind {`
			`%C = fadd <2 x float> %A, %A`
			`ret <2 x float> %C`
test/CodeGen/X86: Add a pattern for Win64. llvm-svn: 127733 2011-03-16 14:52:51 +01:00			`; X64: test4:`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: ret`

			`; W64: test4:`
			`; W64-NEXT: movaps (%rcx), %xmm0`
			`; W64-NEXT: addps %xmm0, %xmm0`
			`; W64-NEXT: ret`

			`; X32: test4:`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: ret`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 00:49:25 +02:00			`}`

			`define <4 x float> @test5(<4 x float> %A) nounwind {`
			`%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>`
			`%C = fadd <2 x float> %B, %B`
			`br label %BB`

			`BB:`
			`%D = fadd <2 x float> %C, %C`
			`%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>`
			`ret <4 x float> %E`

test/CodeGen/X86: Add a pattern for Win64. llvm-svn: 127733 2011-03-16 14:52:51 +01:00			`; X64: test5:`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: ret`

			`; W64: test5:`
			`; W64-NEXT: movaps (%rcx), %xmm0`
			`; W64-NEXT: addps %xmm0, %xmm0`
			`; W64-NEXT: addps %xmm0, %xmm0`
			`; W64-NEXT: ret`

			`; X32: test5:`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: ret`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 00:49:25 +02:00			`}`