diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8d677be9ea2..12b37874485 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7588,11 +7588,10 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr,X86VectorVTInfo _, (_.VT _.RC:$src2), (i32 FROUND_CURRENT))>; defm m_Int : AVX512_maskable_scalar; defm rb_Int : AVX512_maskable_scalar(NAME#SUFF#Zm) (_.EltVT (IMPLICIT_DEF)), addr:$src)>; - def : Pat<(Intr (scalar_to_vector (_.EltVT (load addr:$src2)))), + def : Pat<(Intr _.ScalarIntMemCPat:$src2), (!cast(NAME#SUFF#Zm_Int) (_.VT (IMPLICIT_DEF)), addr:$src2)>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index cb512848e41..dc1eb3e8963 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3040,6 +3040,7 @@ def SSE_RCPS : OpndItins< multiclass sse_fp_unop_s opc, string OpcodeStr, RegisterClass RC, ValueType vt, ValueType ScalarVT, X86MemOperand x86memop, + Operand intmemop, ComplexPattern int_cpat, Intrinsic Intr, SDNode OpNode, Domain d, OpndItins itins, Predicate target, string Suffix> { @@ -3060,7 +3061,7 @@ multiclass sse_fp_unop_s opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; let mayLoad = 1 in - def m_Int : I, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -3080,7 +3081,7 @@ multiclass sse_fp_unop_s opc, string OpcodeStr, RegisterClass RC, // which has a clobber before the rcp, vs. // rcpss mem, %xmm0 let Predicates = [target, OptForSize] in { - def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))), + def : Pat<(Intr int_cpat:$src2), (!cast(NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), addr:$src2)>; } @@ -3089,6 +3090,7 @@ multiclass sse_fp_unop_s opc, string OpcodeStr, RegisterClass RC, multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, ValueType vt, ValueType ScalarVT, X86MemOperand x86memop, + Operand intmemop, ComplexPattern int_cpat, Intrinsic Intr, SDNode OpNode, Domain d, OpndItins itins, Predicate target, string Suffix> { let hasSideEffects = 0 in { @@ -3106,7 +3108,7 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, []>, Sched<[itins.Sched.Folded]>; let mayLoad = 1 in def m_Int : I, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -3129,7 +3131,7 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, VR128:$src)>; } let Predicates = [target, OptForSize] in { - def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))), + def : Pat<(Intr int_cpat:$src2), (!cast("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), addr:$src2)>; def : Pat<(ScalarVT (OpNode (load addr:$src))), @@ -3213,10 +3215,11 @@ let Predicates = [HasAVX, NoVLX] in { multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate AVXTarget> { defm SS : sse_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, SSEPackedSingle, itins, UseSSE1, "SS">, XS; defm V#NAME#SS : avx_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; @@ -3225,10 +3228,11 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate AVXTarget> { defm SD : sse_fp_unop_s("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD; defm V#NAME#SD : avx_fp_unop_s("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, AVXTarget, "SD">, XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; diff --git a/test/CodeGen/X86/fold-load-unops.ll b/test/CodeGen/X86/fold-load-unops.ll index 04689b0836f..bf47c633c35 100644 --- a/test/CodeGen/X86/fold-load-unops.ll +++ b/test/CodeGen/X86/fold-load-unops.ll @@ -101,14 +101,12 @@ define float @rcpss_size(float* %a) optsize { define <4 x float> @rcpss_full_size(<4 x float>* %a) optsize { ; SSE-LABEL: rcpss_full_size: ; SSE: # BB#0: -; SSE-NEXT: movaps (%rdi), %xmm0 -; SSE-NEXT: rcpss %xmm0, %xmm0 +; SSE-NEXT: rcpss (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: rcpss_full_size: ; AVX: # BB#0: -; AVX-NEXT: vmovaps (%rdi), %xmm0 -; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <4 x float>, <4 x float>* %a %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ld) @@ -135,14 +133,12 @@ define float @rsqrtss_size(float* %a) optsize { define <4 x float> @rsqrtss_full_size(<4 x float>* %a) optsize { ; SSE-LABEL: rsqrtss_full_size: ; SSE: # BB#0: -; SSE-NEXT: movaps (%rdi), %xmm0 -; SSE-NEXT: rsqrtss %xmm0, %xmm0 +; SSE-NEXT: rsqrtss (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: rsqrtss_full_size: ; AVX: # BB#0: -; AVX-NEXT: vmovaps (%rdi), %xmm0 -; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <4 x float>, <4 x float>* %a %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ld) @@ -169,14 +165,12 @@ define float @sqrtss_size(float* %a) optsize{ define <4 x float> @sqrtss_full_size(<4 x float>* %a) optsize{ ; SSE-LABEL: sqrtss_full_size: ; SSE: # BB#0: -; SSE-NEXT: movaps (%rdi), %xmm0 -; SSE-NEXT: sqrtss %xmm0, %xmm0 +; SSE-NEXT: sqrtss (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sqrtss_full_size: ; AVX: # BB#0: -; AVX-NEXT: vmovaps (%rdi), %xmm0 -; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <4 x float>, <4 x float>* %a %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld) @@ -203,14 +197,12 @@ define double @sqrtsd_size(double* %a) optsize { define <2 x double> @sqrtsd_full_size(<2 x double>* %a) optsize { ; SSE-LABEL: sqrtsd_full_size: ; SSE: # BB#0: -; SSE-NEXT: movapd (%rdi), %xmm0 -; SSE-NEXT: sqrtsd %xmm0, %xmm0 +; SSE-NEXT: sqrtsd (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sqrtsd_full_size: ; AVX: # BB#0: -; AVX-NEXT: vmovapd (%rdi), %xmm0 -; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <2 x double>, <2 x double>* %a %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld)