1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00
llvm-mirror/test/CodeGen/X86/vec_insert-9.ll
Chris Lattner 8cb4abbc0e fix the buildvector->insertp[sd] logic to not always create a redundant
insertp[sd] $0, which is a noop.  Before:

_f32:                                   ## @f32
	pshufd	$1, %xmm1, %xmm2
	pshufd	$1, %xmm0, %xmm3
	addss	%xmm2, %xmm3
	addss	%xmm1, %xmm0
                                        ## kill: XMM0<def> XMM0<kill> XMM0<def>
	insertps	$0, %xmm0, %xmm0
	insertps	$16, %xmm3, %xmm0
	ret

after:

_f32:                                   ## @f32
	movdqa	%xmm0, %xmm2
	addss	%xmm1, %xmm2
	pshufd	$1, %xmm1, %xmm1
	pshufd	$1, %xmm0, %xmm3
	addss	%xmm1, %xmm3
	movdqa	%xmm2, %xmm0
	insertps	$16, %xmm3, %xmm0
	ret

The extra movs are due to a random (poor) scheduling decision.

llvm-svn: 112379
2010-08-28 17:59:08 +00:00

10 lines
347 B
LLVM

; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
; RUN: grep pinsrd %t | count 1
define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
entry:
%tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0 ; <<4 x i32>> [#uses=1]
%tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3 ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp4
}