1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00
llvm-mirror/test/CodeGen/X86/vec_shift4.ll
Nate Begeman 133820e806 Implement a vectorized algorithm for <16 x i8> << <16 x i8>
This is about 4x faster and smaller than the existing scalarization.

llvm-svn: 109566
2010-07-28 00:21:48 +00:00

26 lines
722 B
LLVM

; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:
; CHECK-NOT: shll
; CHECK: pslld
; CHECK: paddd
; CHECK: cvttps2dq
; CHECK: pmulld
%shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1]
%tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}
define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
entry:
; CHECK-NOT: shlb
; CHECK: pblendvb
; CHECK: pblendvb
; CHECK: pblendvb
%shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1]
%tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}