1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

Certain patterns involving the "movss" instruction were marked as requiring SSE2, when in reality movss is an SSE1 instruction.

llvm-svn: 57246
This commit is contained in:
Anders Carlsson 2008-10-07 16:14:11 +00:00
parent 7127e22321
commit a9c42526f8
2 changed files with 25 additions and 3 deletions

View File

@ -2891,11 +2891,11 @@ let AddedComplexity = 15 in {
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
(MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
(MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
(MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
}
// Splat v2f64 / v2i64

View File

@ -0,0 +1,22 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
define <4 x float> @f(float %w) nounwind {
entry:
%retval = alloca <4 x float> ; <<4 x float>*> [#uses=2]
%w.addr = alloca float ; <float*> [#uses=2]
%.compoundliteral = alloca <4 x float> ; <<4 x float>*> [#uses=2]
store float %w, float* %w.addr
%tmp = load float* %w.addr ; <float> [#uses=1]
%0 = insertelement <4 x float> undef, float %tmp, i32 0 ; <<4 x float>> [#uses=1]
%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
store <4 x float> %3, <4 x float>* %.compoundliteral
%tmp1 = load <4 x float>* %.compoundliteral ; <<4 x float>> [#uses=1]
store <4 x float> %tmp1, <4 x float>* %retval
br label %return
return: ; preds = %entry
%4 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %4
}