From d115a77d3024ae960cdb71de91911f03107f1f9f Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Sat, 16 May 2015 01:02:12 +0000 Subject: [PATCH] [PPC64] Add vector pack/unpack support from ISA 2.07 This patch adds support for the following new instructions in the Power ISA 2.07: vpksdss vpksdus vpkudus vpkudum vupkhsw vupklsw These instructions are available through the vec_packs, vec_packsu, vec_unpackh, and vec_unpackl built-in interfaces. These are lane-sensitive instructions, so the built-ins have different implementations for big- and little-endian, and the instructions must be marked as killing the vector swap optimization for now. The first three instructions perform saturating pack operations. The fourth performs a modulo pack operation, which means it can be represented with a vector shuffle, and conversely the appropriate vector shuffles may cause this instruction to be generated. The other instructions are only generated via built-in support for now. Appropriate tests have been added. There is a companion patch to clang for the rest of this support. llvm-svn: 237499 --- include/llvm/IR/IntrinsicsPowerPC.td | 14 ++++++ lib/Target/PowerPC/PPCISelLowering.cpp | 49 ++++++++++++++++++- lib/Target/PowerPC/PPCISelLowering.h | 5 ++ lib/Target/PowerPC/PPCInstrAltivec.td | 35 +++++++++++++ lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 6 +++ test/CodeGen/PowerPC/vec_shuffle_p8vector.ll | 43 ++++++++++++++++ .../PowerPC/vec_shuffle_p8vector_le.ll | 43 ++++++++++++++++ .../PowerPC/ppc64-encoding-p8vector.txt | 19 +++++++ test/MC/PowerPC/ppc64-encoding-p8vector.s | 26 ++++++++++ 9 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/PowerPC/vec_shuffle_p8vector.ll create mode 100644 test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll create mode 100644 test/MC/Disassembler/PowerPC/ppc64-encoding-p8vector.txt create mode 100644 test/MC/PowerPC/ppc64-encoding-p8vector.s diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index 947a7636a0e..29da61e0e09 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -480,6 +480,12 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; // vpkuhum is lowered to a shuffle. def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], @@ -488,6 +494,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + // vpkudum is lowered to a shuffle. + def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; // Unpacks. def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">, @@ -496,12 +506,16 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_vupkhsh : GCCBuiltin<"__builtin_altivec_vupkhsh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vupkhsw : GCCBuiltin<"__builtin_altivec_vupkhsw">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vupklpx : GCCBuiltin<"__builtin_altivec_vupklpx">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_ppc_altivec_vupklsb : GCCBuiltin<"__builtin_altivec_vupklsb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_vupklsh : GCCBuiltin<"__builtin_altivec_vupklsh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vupklsw : GCCBuiltin<"__builtin_altivec_vupklsw">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; // FP <-> integer conversion. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3ff77fc0161..5fc22e4af14 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1101,7 +1101,7 @@ static bool isConstantOrUndef(int Op, int Val) { /// VPKUHUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical -/// inputs (1), and little-endian operantion with two different inputs (2). +/// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { @@ -1132,7 +1132,7 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, /// VPKUWUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical -/// inputs (1), and little-endian operantion with two different inputs (2). +/// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { @@ -1163,6 +1163,49 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, return true; } +/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a +/// VPKUDUM instruction. +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operations with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG) { + bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + if (ShuffleKind == 0) { + if (IsLE) + return false; + for (unsigned i = 0; i != 16; i += 4) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || + !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || + !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) + return false; + } else if (ShuffleKind == 2) { + if (!IsLE) + return false; + for (unsigned i = 0; i != 16; i += 4) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || + !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = IsLE ? 0 : 4; + for (unsigned i = 0; i != 8; i += 4) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || + !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || + !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || + !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || + !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) + return false; + } + return true; +} + /// isVMerge - Common function, used to match vmrg* shuffles. /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, @@ -6993,6 +7036,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || + PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || @@ -7010,6 +7054,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 33cbb6e5196..81589c8307f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -357,6 +357,11 @@ namespace llvm { bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); + /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a + /// VPKUDUM instruction. + bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG); + /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index d50b197415b..e77f75aa6c1 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -43,6 +43,10 @@ def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVPKUWUMShuffleMask(cast(N), 0, *CurDAG); }]>; +def vpkudum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUDUMShuffleMask(cast(N), 0, *CurDAG); +}]>; def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVPKUHUMShuffleMask(cast(N), 1, *CurDAG); @@ -51,6 +55,10 @@ def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVPKUWUMShuffleMask(cast(N), 1, *CurDAG); }]>; +def vpkudum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUDUMShuffleMask(cast(N), 1, *CurDAG); +}]>; // These fragments are provided for little-endian, where the inputs must be // swapped for correct semantics. @@ -62,6 +70,10 @@ def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVPKUWUMShuffleMask(cast(N), 2, *CurDAG); }]>; +def vpkudum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUDUMShuffleMask(cast(N), 2, *CurDAG); +}]>; def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ @@ -1091,6 +1103,29 @@ def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", int_ppc_altivec_crypto_vpermxor, v16i8>; +// Vector doubleword integer pack and unpack. +def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss, + v4i32, v2i64>; +def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus, + v4i32, v2i64>; +def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpkudum $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, + (vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>; +def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus, + v4i32, v2i64>; +def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw, + v2i64, v4i32>; +def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw, + v2i64, v4i32>; + +// Shuffle patterns for unary and swapped (LE) vector pack modulo. +def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef), + (VPKUDUM $vA, $vA)>; +def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUDUM $vB, $vA)>; + + } // end HasP8Altivec // Crypto instructions (from builtins) diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 6aa25ff6f8e..e238669145a 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -382,8 +382,12 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::VPKPX: case PPC::VPKSHSS: case PPC::VPKSHUS: + case PPC::VPKSDSS: + case PPC::VPKSDUS: case PPC::VPKSWSS: case PPC::VPKSWUS: + case PPC::VPKUDUM: + case PPC::VPKUDUS: case PPC::VPKUHUM: case PPC::VPKUHUS: case PPC::VPKUWUM: @@ -412,9 +416,11 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::VUPKHPX: case PPC::VUPKHSB: case PPC::VUPKHSH: + case PPC::VUPKHSW: case PPC::VUPKLPX: case PPC::VUPKLSB: case PPC::VUPKLSH: + case PPC::VUPKLSW: case PPC::XXMRGHW: case PPC::XXMRGLW: case PPC::XXSPLTW: diff --git a/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll b/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll new file mode 100644 index 00000000000..d81aa729f8e --- /dev/null +++ b/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll @@ -0,0 +1,43 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s + +define void @VPKUDUM_unary(<2 x i64>* %A) { +entry: + %tmp = load <2 x i64>, <2 x i64>* %A + %tmp2 = bitcast <2 x i64> %tmp to <4 x i32> + %tmp3 = extractelement <4 x i32> %tmp2, i32 1 + %tmp4 = extractelement <4 x i32> %tmp2, i32 3 + %tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0 + %tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1 + %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2 + %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3 + %tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64> + store <2 x i64> %tmp9, <2 x i64>* %A + ret void +} + +; CHECK-LABEL: @VPKUDUM_unary +; CHECK-NOT: vperm +; CHECK: vpkudum + +define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) { +entry: + %tmp = load <2 x i64>, <2 x i64>* %A + %tmp2 = bitcast <2 x i64> %tmp to <4 x i32> + %tmp3 = load <2 x i64>, <2 x i64>* %B + %tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32> + %tmp5 = extractelement <4 x i32> %tmp2, i32 1 + %tmp6 = extractelement <4 x i32> %tmp2, i32 3 + %tmp7 = extractelement <4 x i32> %tmp4, i32 1 + %tmp8 = extractelement <4 x i32> %tmp4, i32 3 + %tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0 + %tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1 + %tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2 + %tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3 + %tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64> + store <2 x i64> %tmp13, <2 x i64>* %A + ret void +} + +; CHECK-LABEL: @VPKUDUM +; CHECK-NOT: vperm +; CHECK: vpkudum diff --git a/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll b/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll new file mode 100644 index 00000000000..709388675f6 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll @@ -0,0 +1,43 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s + +define void @VPKUDUM_unary(<2 x i64>* %A) { +entry: + %tmp = load <2 x i64>, <2 x i64>* %A + %tmp2 = bitcast <2 x i64> %tmp to <4 x i32> + %tmp3 = extractelement <4 x i32> %tmp2, i32 0 + %tmp4 = extractelement <4 x i32> %tmp2, i32 2 + %tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0 + %tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1 + %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2 + %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3 + %tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64> + store <2 x i64> %tmp9, <2 x i64>* %A + ret void +} + +; CHECK-LABEL: @VPKUDUM_unary +; CHECK-NOT: vperm +; CHECK: vpkudum + +define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) { +entry: + %tmp = load <2 x i64>, <2 x i64>* %A + %tmp2 = bitcast <2 x i64> %tmp to <4 x i32> + %tmp3 = load <2 x i64>, <2 x i64>* %B + %tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32> + %tmp5 = extractelement <4 x i32> %tmp2, i32 0 + %tmp6 = extractelement <4 x i32> %tmp2, i32 2 + %tmp7 = extractelement <4 x i32> %tmp4, i32 0 + %tmp8 = extractelement <4 x i32> %tmp4, i32 2 + %tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0 + %tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1 + %tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2 + %tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3 + %tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64> + store <2 x i64> %tmp13, <2 x i64>* %A + ret void +} + +; CHECK-LABEL: @VPKUDUM +; CHECK-NOT: vperm +; CHECK: vpkudum diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-p8vector.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-p8vector.txt new file mode 100644 index 00000000000..eeea2db673f --- /dev/null +++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-p8vector.txt @@ -0,0 +1,19 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-unknown -mcpu=pwr8 | FileCheck %s + +# CHECK: vpksdss 2, 3, 4 +0x10 0x43 0x25 0xce + +# CHECK: vpksdus 2, 3, 4 +0x10 0x43 0x25 0x4e + +# CHECK: vpkudus 2, 3, 4 +0x10 0x43 0x24 0xce + +# CHECK: vpkudum 2, 3, 4 +0x10 0x43 0x24 0x4e + +# CHECK: vupkhsw 2, 3 +0x10 0x40 0x1e 0x4e + +# CHECK: vupklsw 2, 3 +0x10 0x40 0x1e 0xce diff --git a/test/MC/PowerPC/ppc64-encoding-p8vector.s b/test/MC/PowerPC/ppc64-encoding-p8vector.s new file mode 100644 index 00000000000..657b7378fb4 --- /dev/null +++ b/test/MC/PowerPC/ppc64-encoding-p8vector.s @@ -0,0 +1,26 @@ +# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s + +# CHECK-BE: vpksdss 2, 3, 4 # encoding: [0x10,0x43,0x25,0xce] +# CHECK-LE: vpksdss 2, 3, 4 # encoding: [0xce,0x25,0x43,0x10] + vpksdss 2, 3, 4 + +# CHECK-BE: vpksdus 2, 3, 4 # encoding: [0x10,0x43,0x25,0x4e] +# CHECK-LE: vpksdus 2, 3, 4 # encoding: [0x4e,0x25,0x43,0x10] + vpksdus 2, 3, 4 + +# CHECK-BE: vpkudus 2, 3, 4 # encoding: [0x10,0x43,0x24,0xce] +# CHECK-LE: vpkudus 2, 3, 4 # encoding: [0xce,0x24,0x43,0x10] + vpkudus 2, 3, 4 + +# CHECK-BE: vpkudum 2, 3, 4 # encoding: [0x10,0x43,0x24,0x4e] +# CHECK-LE: vpkudum 2, 3, 4 # encoding: [0x4e,0x24,0x43,0x10] + vpkudum 2, 3, 4 + +# CHECK-BE: vupkhsw 2, 3 # encoding: [0x10,0x40,0x1e,0x4e] +# CHECK-LE: vupkhsw 2, 3 # encoding: [0x4e,0x1e,0x40,0x10] + vupkhsw 2, 3 + +# CHECK-BE: vupklsw 2, 3 # encoding: [0x10,0x40,0x1e,0xce] +# CHECK-LE: vupklsw 2, 3 # encoding: [0xce,0x1e,0x40,0x10] + vupklsw 2, 3