mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
1512834406
We currently have some isel patterns for treating vzmovl+load the same as vzload, but that shrinks the load which we shouldn't do if the load is volatile. Rather than adding isel checks for volatile. This patch removes the patterns and teachs DAG combine to merge them into vzload when its legal to do so. Differential Revision: https://reviews.llvm.org/D63665 llvm-svn: 364333
91 lines
3.4 KiB
LLVM
91 lines
3.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
|
|
|
define <4 x i32> @load_zmov_4i32_to_0zzz(<4 x i32> *%ptr) {
|
|
; SSE-LABEL: load_zmov_4i32_to_0zzz:
|
|
; SSE: # %bb.0: # %entry
|
|
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: load_zmov_4i32_to_0zzz:
|
|
; AVX: # %bb.0: # %entry
|
|
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; AVX-NEXT: retq
|
|
entry:
|
|
%X = load <4 x i32>, <4 x i32>* %ptr
|
|
%Y = shufflevector <4 x i32> %X, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
|
|
ret <4 x i32>%Y
|
|
}
|
|
|
|
define <2 x i64> @load_zmov_2i64_to_0z(<2 x i64> *%ptr) {
|
|
; SSE-LABEL: load_zmov_2i64_to_0z:
|
|
; SSE: # %bb.0: # %entry
|
|
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: load_zmov_2i64_to_0z:
|
|
; AVX: # %bb.0: # %entry
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; AVX-NEXT: retq
|
|
entry:
|
|
%X = load <2 x i64>, <2 x i64>* %ptr
|
|
%Y = shufflevector <2 x i64> %X, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
|
|
ret <2 x i64>%Y
|
|
}
|
|
|
|
define <4 x i32> @load_zmov_4i32_to_0zzz_volatile(<4 x i32> *%ptr) {
|
|
; SSE2-LABEL: load_zmov_4i32_to_0zzz_volatile:
|
|
; SSE2: # %bb.0: # %entry
|
|
; SSE2-NEXT: movaps (%rdi), %xmm1
|
|
; SSE2-NEXT: xorps %xmm0, %xmm0
|
|
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; SSSE3-LABEL: load_zmov_4i32_to_0zzz_volatile:
|
|
; SSSE3: # %bb.0: # %entry
|
|
; SSSE3-NEXT: movaps (%rdi), %xmm1
|
|
; SSSE3-NEXT: xorps %xmm0, %xmm0
|
|
; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
|
; SSSE3-NEXT: retq
|
|
;
|
|
; SSE41-LABEL: load_zmov_4i32_to_0zzz_volatile:
|
|
; SSE41: # %bb.0: # %entry
|
|
; SSE41-NEXT: movaps (%rdi), %xmm1
|
|
; SSE41-NEXT: xorps %xmm0, %xmm0
|
|
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
|
; SSE41-NEXT: retq
|
|
;
|
|
; AVX-LABEL: load_zmov_4i32_to_0zzz_volatile:
|
|
; AVX: # %bb.0: # %entry
|
|
; AVX-NEXT: vmovaps (%rdi), %xmm0
|
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
|
; AVX-NEXT: retq
|
|
entry:
|
|
%X = load volatile <4 x i32>, <4 x i32>* %ptr
|
|
%Y = shufflevector <4 x i32> %X, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
|
|
ret <4 x i32>%Y
|
|
}
|
|
|
|
define <2 x i64> @load_zmov_2i64_to_0z_volatile(<2 x i64> *%ptr) {
|
|
; SSE-LABEL: load_zmov_2i64_to_0z_volatile:
|
|
; SSE: # %bb.0: # %entry
|
|
; SSE-NEXT: movdqa (%rdi), %xmm0
|
|
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: load_zmov_2i64_to_0z_volatile:
|
|
; AVX: # %bb.0: # %entry
|
|
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
|
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
|
; AVX-NEXT: retq
|
|
entry:
|
|
%X = load volatile <2 x i64>, <2 x i64>* %ptr
|
|
%Y = shufflevector <2 x i64> %X, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
|
|
ret <2 x i64>%Y
|
|
}
|