Certain patterns involving the "movss" instruction were marked as requiring SSE2, when in reality movss is an SSE1 instruction.

llvm-svn: 57246
2024-11-24 03:33:20 +01:00 · 2008-10-07 16:14:11 +00:00 · 2008-10-07 16:14:11 +00:00 · a9c42526f8
commit a9c42526f8
parent 7127e22321
2 changed files with 25 additions and 3 deletions
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -2891,11 +2891,11 @@ let AddedComplexity = 15 in {
 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
          (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-          (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
+          (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
 }

 // Splat v2f64 / v2i64
--- a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
+++ b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+
+define <4 x float> @f(float %w) nounwind {
+entry:
+	%retval = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	%w.addr = alloca float		; <float*> [#uses=2]
+	%.compoundliteral = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	store float %w, float* %w.addr
+	%tmp = load float* %w.addr		; <float> [#uses=1]
+	%0 = insertelement <4 x float> undef, float %tmp, i32 0		; <<4 x float>> [#uses=1]
+	%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %3, <4 x float>* %.compoundliteral
+	%tmp1 = load <4 x float>* %.compoundliteral		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp1, <4 x float>* %retval
+	br label %return
+
+return:		; preds = %entry
+	%4 = load <4 x float>* %retval		; <<4 x float>> [#uses=1]
+	ret <4 x float> %4
+}