1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 03:53:04 +02:00

[X86] Add patterns to use VMOVSS/SD zero masking for scalar f32/f64 select with zero.

These showed up in some of the upgraded FMA code. We really need to improve these test cases more, but this helps for now.

llvm-svn: 336875
This commit is contained in:
Craig Topper 2018-07-12 00:54:40 +00:00
parent 40027cf33a
commit 89788bf942
3 changed files with 20 additions and 18 deletions

View File

@ -4237,11 +4237,19 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
(COPY_TO_REGCLASS (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
(COPY_TO_REGCLASS (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),

View File

@ -9083,9 +9083,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
; X86-NEXT: vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01]
; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; X86-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc8]
; X86-NEXT: vmovss %xmm1, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x0a]
; X86-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: fmadd_ss_maskz_memfold:
@ -9095,9 +9094,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
; X64-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; X64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc8]
; X64-NEXT: vmovss %xmm1, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x0f]
; X64-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%a.val = load float, float* %a
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
@ -9173,9 +9171,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
; X86-NEXT: vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01]
; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
; X86-NEXT: vmovsd %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc8]
; X86-NEXT: vmovsd %xmm1, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x0a]
; X86-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: fmadd_sd_maskz_memfold:
@ -9185,9 +9182,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
; X64-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc8]
; X64-NEXT: vmovsd %xmm1, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x0f]
; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0

View File

@ -4629,9 +4629,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovss %xmm1, (%rdi)
; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
@ -4693,9 +4692,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmovsd %xmm0, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovsd %xmm1, (%rdi)
; CHECK-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovsd %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a
%av0 = insertelement <2 x double> undef, double %a.val, i32 0