mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[PPC] Implement vmrgew and vmrgow instructions
This patch adds support for the vector merge even word and vector merge odd word instructions introduced in POWER8. Phabricator review: http://reviews.llvm.org/D10704 llvm-svn: 240650
This commit is contained in:
parent
43fa3a4dea
commit
230223268c
@ -1279,6 +1279,99 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Common function used to match vmrgew and vmrgow shuffles
|
||||
*
|
||||
* The indexOffset determines whether to look for even or odd words in
|
||||
* the shuffle mask. This is based on the of the endianness of the target
|
||||
* machine.
|
||||
* - Little Endian:
|
||||
* - Use offset of 0 to check for odd elements
|
||||
* - Use offset of 4 to check for even elements
|
||||
* - Big Endian:
|
||||
* - Use offset of 0 to check for even elements
|
||||
* - Use offset of 4 to check for odd elements
|
||||
* A detailed description of the vector element ordering for little endian and
|
||||
* big endian can be found at <a
|
||||
* href="http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html">
|
||||
* Targeting your applications - what little endian and big endian IBM XL C/C++
|
||||
* compiler differences mean to you </a>
|
||||
*
|
||||
* The mask to the shuffle vector instruction specifies the indices of the
|
||||
* elements from the two input vectors to place in the result. The elements are
|
||||
* numbered in array-access order, starting with the first vector. These vectors
|
||||
* are always of type v16i8, thus each vector will contain 16 elements of size
|
||||
* 8. More info on the shuffle vector can be found in the <a
|
||||
* href="http://llvm.org/docs/LangRef.html#shufflevector-instruction">Language
|
||||
* Reference</a>.
|
||||
*
|
||||
* The RHSStartValue indicates whether the same input vectors are used (unary)
|
||||
* or two different input vectors are used, based on the following:
|
||||
* - If the instruction uses the same vector for both inputs, the range of the
|
||||
* indices will be 0 to 15. In this case, the RHSStart value passed should
|
||||
* be 0.
|
||||
* - If the instruction has two different vectors then the range of the
|
||||
* indices will be 0 to 31. In this case, the RHSStart value passed should
|
||||
* be 16 (indices 0-15 specify elements in the first vector while indices 16
|
||||
* to 31 specify elements in the second vector).
|
||||
*
|
||||
* \param[in] N The shuffle vector SD Node to analyze
|
||||
* \param[in] IndexOffset Specifies whether to look for even or odd elements
|
||||
* \param[in] RHSStartValue Specifies the starting index for the righthand input
|
||||
* vector to the shuffle_vector instruction
|
||||
* \return true iff this shuffle vector represents an even or odd word merge
|
||||
*/
|
||||
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
|
||||
unsigned RHSStartValue) {
|
||||
if (N->getValueType(0) != MVT::v16i8)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i)
|
||||
for (unsigned j = 0; j < 4; ++j)
|
||||
if (!isConstantOrUndef(N->getMaskElt(i*4+j),
|
||||
i*RHSStartValue+j+IndexOffset) ||
|
||||
!isConstantOrUndef(N->getMaskElt(i*4+j+8),
|
||||
i*RHSStartValue+j+IndexOffset+8))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Determine if the specified shuffle mask is suitable for the vmrgew or
|
||||
* vmrgow instructions.
|
||||
*
|
||||
* \param[in] N The shuffle vector SD Node to analyze
|
||||
* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
|
||||
* \param[in] ShuffleKind Identify the type of merge:
|
||||
* - 0 = big-endian merge with two different inputs;
|
||||
* - 1 = either-endian merge with two identical inputs;
|
||||
* - 2 = little-endian merge with two different inputs (inputs are swapped for
|
||||
* little-endian merges).
|
||||
* \param[in] DAG The current SelectionDAG
|
||||
* \return true iff this shuffle mask
|
||||
*/
|
||||
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
|
||||
unsigned ShuffleKind, SelectionDAG &DAG) {
|
||||
if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
|
||||
unsigned indexOffset = CheckEven ? 4 : 0;
|
||||
if (ShuffleKind == 1) // Unary
|
||||
return isVMerge(N, indexOffset, 0);
|
||||
else if (ShuffleKind == 2) // swapped
|
||||
return isVMerge(N, indexOffset, 16);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
unsigned indexOffset = CheckEven ? 0 : 4;
|
||||
if (ShuffleKind == 1) // Unary
|
||||
return isVMerge(N, indexOffset, 0);
|
||||
else if (ShuffleKind == 0) // Normal
|
||||
return isVMerge(N, indexOffset, 16);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
|
||||
/// amount, otherwise return -1.
|
||||
@ -7046,7 +7139,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
|
||||
PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
|
||||
PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
|
||||
PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
|
||||
return Op;
|
||||
}
|
||||
}
|
||||
@ -7064,7 +7159,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
|
||||
PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
|
||||
PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
|
||||
PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
|
||||
return Op;
|
||||
|
||||
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
|
||||
|
@ -382,6 +382,11 @@ namespace llvm {
|
||||
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
||||
unsigned ShuffleKind, SelectionDAG &DAG);
|
||||
|
||||
/// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
|
||||
/// a VMRGEW or VMRGOW instruction
|
||||
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
|
||||
unsigned ShuffleKind, SelectionDAG &DAG);
|
||||
|
||||
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
|
||||
/// shift amount, otherwise return -1.
|
||||
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
|
||||
|
@ -155,6 +155,33 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
}]>;
|
||||
|
||||
|
||||
def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 0, *CurDAG);
|
||||
}]>;
|
||||
def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 0, *CurDAG);
|
||||
}]>;
|
||||
def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 1, *CurDAG);
|
||||
}]>;
|
||||
def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 1, *CurDAG);
|
||||
}]>;
|
||||
def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 2, *CurDAG);
|
||||
}]>;
|
||||
def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 2, *CurDAG);
|
||||
}]>;
|
||||
|
||||
|
||||
|
||||
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N));
|
||||
}]>;
|
||||
@ -1008,6 +1035,29 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
|
||||
def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
|
||||
} // isCommutable
|
||||
|
||||
// Vector merge
|
||||
def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vmrgew $vD, $vA, $vB", IIC_VecFP,
|
||||
[(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>;
|
||||
def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vmrgow $vD, $vA, $vB", IIC_VecFP,
|
||||
[(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>;
|
||||
|
||||
// Match vmrgew(x,x) and vmrgow(x,x)
|
||||
def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
|
||||
(VMRGEW $vA, $vA)>;
|
||||
def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef),
|
||||
(VMRGOW $vA, $vA)>;
|
||||
|
||||
// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments
|
||||
// are matched for little-endian, where the inputs must be swapped for correct
|
||||
// semantics.w
|
||||
def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
|
||||
(VMRGEW $vB, $vA)>;
|
||||
def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
|
||||
(VMRGOW $vB, $vA)>;
|
||||
|
||||
|
||||
// Vector shifts
|
||||
def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
|
||||
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
|
101
test/CodeGen/PowerPC/vec_mergeow.ll
Normal file
101
test/CodeGen/PowerPC/vec_mergeow.ll
Normal file
@ -0,0 +1,101 @@
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
|
||||
; RUN: FileCheck %s -check-prefix=CHECK-LE
|
||||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
|
||||
; RUN: FileCheck %s -check-prefix=CHECK-BE
|
||||
|
||||
; Check for a vector merge instruction using two inputs
|
||||
; The shufflevector specifies the even elements, using big endian element
|
||||
; ordering. If run on a big endian machine, this should produce the vmrgew
|
||||
; instruction. If run on a little endian machine, this should produce the
|
||||
; vmrgow instruction. Note also that on little endian the input registers
|
||||
; are swapped also.
|
||||
define void @check_merge_even_xy(<16 x i8>* %A, <16 x i8>* %B) {
|
||||
entry:
|
||||
; CHECK-LE-LABEL: @check_merge_even_xy
|
||||
; CHECK-BE-LABEL: @check_merge_even_xy
|
||||
%tmp = load <16 x i8>, <16 x i8>* %A
|
||||
%tmp2 = load <16 x i8>, <16 x i8>* %B
|
||||
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 16, i32 17, i32 18, i32 19,
|
||||
i32 8, i32 9, i32 10, i32 11,
|
||||
i32 24, i32 25, i32 26, i32 27>
|
||||
; CHECK-LE: vmrgow 2, 3, 2
|
||||
; CHECK-BE: vmrgew 2, 2, 3
|
||||
store <16 x i8> %tmp3, <16 x i8>* %A
|
||||
ret void
|
||||
; CHECK-LE: blr
|
||||
; CHECK-BE: blr
|
||||
}
|
||||
|
||||
; Check for a vector merge instruction using a single input.
|
||||
; The shufflevector specifies the even elements, using big endian element
|
||||
; ordering. If run on a big endian machine, this should produce the vmrgew
|
||||
; instruction. If run on a little endian machine, this should produce the
|
||||
; vmrgow instruction.
|
||||
define void @check_merge_even_xx(<16 x i8>* %A) {
|
||||
entry:
|
||||
; CHECK-LE-LABEL: @check_merge_even_xx
|
||||
; CHECK-BE-LABEL: @check_merge_even_xx
|
||||
%tmp = load <16 x i8>, <16 x i8>* %A
|
||||
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 0, i32 1, i32 2, i32 3,
|
||||
i32 8, i32 9, i32 10, i32 11,
|
||||
i32 8, i32 9, i32 10, i32 11>
|
||||
; CHECK-LE: vmrgow 2, 2, 2
|
||||
; CHECK-BE: vmrgew 2, 2, 2
|
||||
store <16 x i8> %tmp2, <16 x i8>* %A
|
||||
ret void
|
||||
; CHECK-LE: blr
|
||||
; CHECK-BE: blr
|
||||
}
|
||||
|
||||
; Check for a vector merge instruction using two inputs.
|
||||
; The shufflevector specifies the odd elements, using big endian element
|
||||
; ordering. If run on a big endian machine, this should produce the vmrgow
|
||||
; instruction. If run on a little endian machine, this should produce the
|
||||
; vmrgew instruction. Note also that on little endian the input registers
|
||||
; are swapped also.
|
||||
define void @check_merge_odd_xy(<16 x i8>* %A, <16 x i8>* %B) {
|
||||
entry:
|
||||
; CHECK-LE-LABEL: @check_merge_odd_xy
|
||||
; CHECK-BE-LABEL: @check_merge_odd_xy
|
||||
%tmp = load <16 x i8>, <16 x i8>* %A
|
||||
%tmp2 = load <16 x i8>, <16 x i8>* %B
|
||||
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2,
|
||||
<16 x i32> <i32 4, i32 5, i32 6, i32 7,
|
||||
i32 20, i32 21, i32 22, i32 23,
|
||||
i32 12, i32 13, i32 14, i32 15,
|
||||
i32 28, i32 29, i32 30, i32 31>
|
||||
; CHECK-LE: vmrgew 2, 3, 2
|
||||
; CHECK-BE: vmrgow 2, 2, 3
|
||||
store <16 x i8> %tmp3, <16 x i8>* %A
|
||||
ret void
|
||||
; CHECK-LE: blr
|
||||
; CHECK-BE: blr
|
||||
}
|
||||
|
||||
; Check for a vector merge instruction using a single input.
|
||||
; The shufflevector specifies the odd elements, using big endian element
|
||||
; ordering. If run on a big endian machine, this should produce the vmrgow
|
||||
; instruction. If run on a little endian machine, this should produce the
|
||||
; vmrgew instruction.
|
||||
define void @check_merge_odd_xx(<16 x i8>* %A) {
|
||||
entry:
|
||||
; CHECK-LE-LABEL: @check_merge_odd_xx
|
||||
; CHECK-BE-LABEL: @check_merge_odd_xx
|
||||
%tmp = load <16 x i8>, <16 x i8>* %A
|
||||
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp,
|
||||
<16 x i32> <i32 4, i32 5, i32 6, i32 7,
|
||||
i32 4, i32 5, i32 6, i32 7,
|
||||
i32 12, i32 13, i32 14, i32 15,
|
||||
i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-LE: vmrgew 2, 2, 2
|
||||
; CHECK-BE: vmrgow 2, 2, 2
|
||||
store <16 x i8> %tmp2, <16 x i8>* %A
|
||||
ret void
|
||||
; CHECK-LE: blr
|
||||
; CHECK-BE: blr
|
||||
}
|
||||
|
@ -99,6 +99,12 @@
|
||||
# CHECK: vmrglw 2, 3, 4
|
||||
0x10 0x43 0x21 0x8c
|
||||
|
||||
# CHECK: vmrgew 2, 3, 4
|
||||
0x10 0x43 0x27 0x8c
|
||||
|
||||
# CHECK: vmrgow 2, 3, 4
|
||||
0x10 0x43 0x26 0x8c
|
||||
|
||||
# CHECK: vspltb 2, 3, 1
|
||||
0x10 0x41 0x1a 0x0c
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
|
||||
# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
|
||||
# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
|
||||
|
||||
# Vector facility
|
||||
@ -110,7 +110,13 @@
|
||||
# CHECK-BE: vmrglw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x8c]
|
||||
# CHECK-LE: vmrglw 2, 3, 4 # encoding: [0x8c,0x21,0x43,0x10]
|
||||
vmrglw 2, 3, 4
|
||||
|
||||
# CHECK-BE: vmrgew 2, 3, 4 # encoding: [0x10,0x43,0x27,0x8c]
|
||||
# CHECK-LE: vmrgew 2, 3, 4 # encoding: [0x8c,0x27,0x43,0x10]
|
||||
vmrgew 2, 3, 4
|
||||
# CHECK-BE: vmrgow 2, 3, 4 # encoding: [0x10,0x43,0x26,0x8c]
|
||||
# CHECK-LE: vmrgow 2, 3, 4 # encoding: [0x8c,0x26,0x43,0x10]
|
||||
vmrgow 2, 3, 4
|
||||
|
||||
# CHECK-BE: vspltb 2, 3, 1 # encoding: [0x10,0x41,0x1a,0x0c]
|
||||
# CHECK-LE: vspltb 2, 3, 1 # encoding: [0x0c,0x1a,0x41,0x10]
|
||||
vspltb 2, 3, 1
|
||||
|
Loading…
Reference in New Issue
Block a user