1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/X86/vec_shift2.ll
Sanjay Patel 4a6def05d5 [x86] favor vector constant load to avoid GPR to XMM transfer, part 2
This replaces the build_vector lowering code that was just added in
D80013
and matches the pattern later from the x86-specific "vzext_movl".
That seems to result in the same or better improvements and gets rid
of the 'TODO' items from that patch.

AFAICT, we always shrink wider constant vectors to 128-bit on these
patterns, so we still get the implicit zero-extension to ymm/zmm
without wasting space on larger vector constants. There's a trade-off
there because that means we miss potential load-folding.

Similarly, we could load scalar constants here with implicit
zero-extension even to 128-bit. That saves constant space, but it
means we forego load-folding, and so it increases register pressure.
This seems like a good middle-ground between those 2 options.

Differential Revision: https://reviews.llvm.org/D80131
2020-05-25 08:01:48 -04:00

42 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0:
; X32-NEXT: psrlw $14, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0:
; X64-NEXT: psrlw $14, %xmm0
; X64-NEXT: retq
%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
%tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
%tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
ret <2 x i64> %tmp3
}
define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0:
; X32-NEXT: movl $14, %eax
; X32-NEXT: movd %eax, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0:
; X64-NEXT: movl $14, %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pslld %xmm1, %xmm0
; X64-NEXT: retq
%tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
%tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
ret <4 x i32> %tmp2
}
declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone