mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[X86] Use 128-bit blends instead vmovss/vmovsd for 512-bit vzmovl patterns to match AVX.
llvm-svn: 337135
This commit is contained in:
parent
e4e5b712ac
commit
7377fe97fa
@ -4348,9 +4348,7 @@ let Predicates = [HasAVX512, OptForSize] in {
|
|||||||
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
|
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
|
||||||
(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
|
(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
|
||||||
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
|
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
|
||||||
}
|
|
||||||
|
|
||||||
let Predicates = [HasAVX512] in {
|
|
||||||
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
|
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
|
||||||
(SUBREG_TO_REG (i32 0),
|
(SUBREG_TO_REG (i32 0),
|
||||||
(VMOVSSZrr (v4f32 (AVX512_128_SET0)),
|
(VMOVSSZrr (v4f32 (AVX512_128_SET0)),
|
||||||
@ -4360,6 +4358,45 @@ let Predicates = [HasAVX512] in {
|
|||||||
(VMOVSSZrr (v4i32 (AVX512_128_SET0)),
|
(VMOVSSZrr (v4i32 (AVX512_128_SET0)),
|
||||||
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
|
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
|
||||||
|
|
||||||
|
def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VMOVSDZrr (v2f64 (AVX512_128_SET0)),
|
||||||
|
(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
|
||||||
|
(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
|
||||||
|
(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
|
||||||
|
// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
|
||||||
|
let Predicates = [HasAVX512, OptForSpeed] in {
|
||||||
|
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VBLENDPSrri (v4f32 (V_SET0)),
|
||||||
|
(EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm),
|
||||||
|
(i8 1)), sub_xmm)>;
|
||||||
|
def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VPBLENDWrri (v4i32 (V_SET0)),
|
||||||
|
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm),
|
||||||
|
(i8 3)), sub_xmm)>;
|
||||||
|
|
||||||
|
def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VBLENDPDrri (v2f64 (V_SET0)),
|
||||||
|
(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm),
|
||||||
|
(i8 1)), sub_xmm)>;
|
||||||
|
def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VPBLENDWrri (v2i64 (V_SET0)),
|
||||||
|
(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm),
|
||||||
|
(i8 0xf)), sub_xmm)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX512] in {
|
||||||
|
|
||||||
// MOVSSrm zeros the high parts of the register; represent this
|
// MOVSSrm zeros the high parts of the register; represent this
|
||||||
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
|
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
|
||||||
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
|
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
|
||||||
@ -4416,16 +4453,6 @@ let Predicates = [HasAVX512] in {
|
|||||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
|
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
|
||||||
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
|
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
|
||||||
|
|
||||||
// Move low f64 and clear high bits.
|
|
||||||
def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
|
|
||||||
(SUBREG_TO_REG (i32 0),
|
|
||||||
(VMOVSDZrr (v2f64 (AVX512_128_SET0)),
|
|
||||||
(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
|
|
||||||
|
|
||||||
def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
|
|
||||||
(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
|
|
||||||
(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
|
|
||||||
|
|
||||||
// Extract and store.
|
// Extract and store.
|
||||||
def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
|
def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
|
||||||
addr:$dst),
|
addr:$dst),
|
||||||
|
@ -388,7 +388,7 @@ define <16 x i32> @shuffle_v16i32_0zzzzzzzzzzzzzzz(<16 x i32> %a) {
|
|||||||
; ALL-LABEL: shuffle_v16i32_0zzzzzzzzzzzzzzz:
|
; ALL-LABEL: shuffle_v16i32_0zzzzzzzzzzzzzzz:
|
||||||
; ALL: # %bb.0:
|
; ALL: # %bb.0:
|
||||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
%shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
%shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||||
ret <16 x i32> %shuffle
|
ret <16 x i32> %shuffle
|
||||||
@ -398,7 +398,7 @@ define <16 x float> @shuffle_v16f32_0zzzzzzzzzzzzzzz(<16 x float> %a) {
|
|||||||
; ALL-LABEL: shuffle_v16f32_0zzzzzzzzzzzzzzz:
|
; ALL-LABEL: shuffle_v16f32_0zzzzzzzzzzzzzzz:
|
||||||
; ALL: # %bb.0:
|
; ALL: # %bb.0:
|
||||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
%shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
%shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||||
ret <16 x float> %shuffle
|
ret <16 x float> %shuffle
|
||||||
|
@ -1969,8 +1969,8 @@ define <8 x double> @shuffle_v8f64_uuu2301(<8 x double> %a0, <8 x double> %a1) {
|
|||||||
define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
|
define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
|
||||||
; ALL-LABEL: shuffle_v8i64_0zzzzzzz:
|
; ALL-LABEL: shuffle_v8i64_0zzzzzzz:
|
||||||
; ALL: # %bb.0:
|
; ALL: # %bb.0:
|
||||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||||
; ALL-NEXT: ret{{[l|q]}}
|
; ALL-NEXT: ret{{[l|q]}}
|
||||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
%shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||||
ret <8 x i64> %shuffle
|
ret <8 x i64> %shuffle
|
||||||
@ -1979,8 +1979,8 @@ define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
|
|||||||
define <8 x double> @shuffle_v8f64_0zzzzzzz(<8 x double> %a) {
|
define <8 x double> @shuffle_v8f64_0zzzzzzz(<8 x double> %a) {
|
||||||
; ALL-LABEL: shuffle_v8f64_0zzzzzzz:
|
; ALL-LABEL: shuffle_v8f64_0zzzzzzz:
|
||||||
; ALL: # %bb.0:
|
; ALL: # %bb.0:
|
||||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||||
; ALL-NEXT: ret{{[l|q]}}
|
; ALL-NEXT: ret{{[l|q]}}
|
||||||
%shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
%shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||||
ret <8 x double> %shuffle
|
ret <8 x double> %shuffle
|
||||||
|
Loading…
x
Reference in New Issue
Block a user