mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AVX-512] Add isel patterns to turn native masked scalar add/sub/mul/div into masked instructions.
llvm-svn: 290564
This commit is contained in:
parent
0a489c7ab3
commit
eb56be8c45
@ -9117,6 +9117,17 @@ multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
|
||||
def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst),
|
||||
(Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))),
|
||||
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
|
||||
|
||||
// extracted masked scalar math op with insert via movss
|
||||
def : Pat<(X86Movss (v4f32 VR128X:$src1),
|
||||
(scalar_to_vector
|
||||
(X86selects VK1WM:$mask,
|
||||
(Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
|
||||
FR32X:$src2),
|
||||
FR32X:$src0))),
|
||||
(!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
|
||||
VK1WM:$mask, v4f32:$src1,
|
||||
(COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
|
||||
}
|
||||
}
|
||||
|
||||
@ -9150,6 +9161,17 @@ multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
|
||||
def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst),
|
||||
(Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))),
|
||||
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
|
||||
|
||||
// extracted masked scalar math op with insert via movss
|
||||
def : Pat<(X86Movsd (v2f64 VR128X:$src1),
|
||||
(scalar_to_vector
|
||||
(X86selects VK1WM:$mask,
|
||||
(Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
|
||||
FR64X:$src2),
|
||||
FR64X:$src0))),
|
||||
(!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
|
||||
VK1WM:$mask, v2f64:$src1,
|
||||
(COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1119,9 +1119,9 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c,
|
||||
;
|
||||
; AVX512-LABEL: add_ss_mask:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: andl $1, %edi
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = extractelement <4 x float> %a, i64 0
|
||||
@ -1174,9 +1174,9 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double>
|
||||
;
|
||||
; AVX512-LABEL: add_sd_mask:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: andl $1, %edi
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = extractelement <2 x double> %a, i64 0
|
||||
|
Loading…
Reference in New Issue
Block a user