1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00
llvm-mirror/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
Bill Schmidt d115a77d30 [PPC64] Add vector pack/unpack support from ISA 2.07
This patch adds support for the following new instructions in the
Power ISA 2.07:

  vpksdss
  vpksdus
  vpkudus
  vpkudum
  vupkhsw
  vupklsw

These instructions are available through the vec_packs, vec_packsu,
vec_unpackh, and vec_unpackl built-in interfaces.  These are
lane-sensitive instructions, so the built-ins have different
implementations for big- and little-endian, and the instructions must
be marked as killing the vector swap optimization for now.

The first three instructions perform saturating pack operations.  The
fourth performs a modulo pack operation, which means it can be
represented with a vector shuffle, and conversely the appropriate
vector shuffles may cause this instruction to be generated.  The other
instructions are only generated via built-in support for now.

Appropriate tests have been added.

There is a companion patch to clang for the rest of this support.

llvm-svn: 237499
2015-05-16 01:02:12 +00:00

44 lines
1.5 KiB
LLVM

; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s
define void @VPKUDUM_unary(<2 x i64>* %A) {
entry:
%tmp = load <2 x i64>, <2 x i64>* %A
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
%tmp3 = extractelement <4 x i32> %tmp2, i32 0
%tmp4 = extractelement <4 x i32> %tmp2, i32 2
%tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0
%tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3
%tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64>
store <2 x i64> %tmp9, <2 x i64>* %A
ret void
}
; CHECK-LABEL: @VPKUDUM_unary
; CHECK-NOT: vperm
; CHECK: vpkudum
define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
entry:
%tmp = load <2 x i64>, <2 x i64>* %A
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
%tmp3 = load <2 x i64>, <2 x i64>* %B
%tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32>
%tmp5 = extractelement <4 x i32> %tmp2, i32 0
%tmp6 = extractelement <4 x i32> %tmp2, i32 2
%tmp7 = extractelement <4 x i32> %tmp4, i32 0
%tmp8 = extractelement <4 x i32> %tmp4, i32 2
%tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0
%tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1
%tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2
%tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3
%tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64>
store <2 x i64> %tmp13, <2 x i64>* %A
ret void
}
; CHECK-LABEL: @VPKUDUM
; CHECK-NOT: vperm
; CHECK: vpkudum