2010-03-24 21:49:50 +01:00
|
|
|
; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
|
fix the buildvector->insertp[sd] logic to not always create a redundant
insertp[sd] $0, which is a noop. Before:
_f32: ## @f32
pshufd $1, %xmm1, %xmm2
pshufd $1, %xmm0, %xmm3
addss %xmm2, %xmm3
addss %xmm1, %xmm0
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm3, %xmm0
ret
after:
_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
pshufd $1, %xmm1, %xmm1
pshufd $1, %xmm0, %xmm3
addss %xmm1, %xmm3
movdqa %xmm2, %xmm0
insertps $16, %xmm3, %xmm0
ret
The extra movs are due to a random (poor) scheduling decision.
llvm-svn: 112379
2010-08-28 19:59:08 +02:00
|
|
|
; RUN: grep pinsrd %t | count 1
|
2010-03-24 21:49:50 +01:00
|
|
|
|
|
|
|
define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
|
|
|
|
entry:
|
|
|
|
%tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0 ; <<4 x i32>> [#uses=1]
|
|
|
|
%tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3 ; <<4 x i32>> [#uses=1]
|
|
|
|
ret <4 x i32> %tmp4
|
|
|
|
}
|